How to print html source to console with phantomjs
Spent some time to read the documentation, it should be obvious afterwards.
var page = require('webpage').create();
page.open('http://google.com', function () {
console.log(page.content);
phantom.exit();
});
How to print html source to console with phantom-crawler
get the content
instead of the plainText
promise.
The module phantom-crawler uses the module node-phantom-simple, which uses phantomjs.
You can find the list of properties you can call in the phantomjs wiki.
var Crawler = require('phantom-crawler');
// Can be initialized with optional options object
var crawler = new Crawler();
// queue is an array of URLs to be crawled
crawler.queue.push('https://google.com/');
// Can also do `crawler.fetch(url)` instead of pushing it and crawling it
// Extract plainText out of each phantomjs page
Promise.all(crawler.crawl())
.then(function(pages) {
var allHtml = [];
for (var i = 0; i < pages.length; i++) {
var page = pages[i];
// suffix Promise to return promises instead of callbacks
var html = page.getPromise('content');
allHtml.push(html);
html.then(function(p) {
return function() {
// Pages are like tabs, they should be closed
p.close()
}
}(page));
}
return Promise.all(allHtml);
})
.then(function(allHtml) {
// allHtml = array of plaintext from the website bodies
// also supports ajax requests
console.log(allHtml);
})
.then(function () {
// kill that phantomjs bridge
crawler.phantom.then(function (p) {
p.exit();
});
})
Get javascript rendered html source using phantomjs
Unfortunately, that is not possible using just the PhantomJS command line. You have to use a Javascript file to actually accomplish anything with PhantomJS.
Here is a very simple version of the script you can use
Code mostly copied from https://stackoverflow.com/a/12469284/4499924
printSource.js
var system = require('system');
var page = require('webpage').create();
// system.args[0] is the filename, so system.args[1] is the first real argument
var url = system.args[1];
// render the page, and run the callback function
page.open(url, function () {
// page.content is the source
console.log(page.content);
// need to call phantom.exit() to prevent from hanging
phantom.exit();
});
To print the page source to standard out.
phantomjs printSource.js http://todomvc.com/examples/emberjs/
To save the page source in a file
phantomjs printSource.js http://todomvc.com/examples/emberjs/ > ember.html
Related Topics
Make Floating Divs The Same Height
Question Mark Characters Display Within Text. Why Is This
How to Use Bootstrap-Theme.CSS with Bootstrap 3
Border Around Tr Element Doesn't Show
CSS Selector for No-Children-But-Not-Empty
How to Align a Label to The "Bottom" of a Div in CSS
Difference Between HTML Link Media and CSS Media Queries
Use The HTML <Img> Tag as a Background Image Instead of The CSS Background-Image Property
Is It Possible Put Image in Input Type="Check Box"
Always Show Vertical Scrollbar in <Select>
How to Prevent Browser from Caching Form Fields
How to Add a Google Search Box to My Website
How to Create Curved & Overlapping Menu Tabs in CSS
Google Maps Height 100% of Div Parent