How to Scrape Pages with Dynamic Content Using Node.Js

How can I scrape pages with dynamic content using node.js?

Here you go;

var phantom = require('phantom');

phantom.create(function (ph) {
ph.createPage(function (page) {
var url = "http://www.bdtong.co.kr/index.php?c_category=C02";
page.open(url, function() {
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
page.evaluate(function() {
$('.listMain > li').each(function () {
console.log($(this).find('a').attr('href'));
});
}, function(){
ph.exit()
});
});
});
});
});

Scrape dynamic site using puppeteer

Everything is correct. All you have to do is await your getPage call as it is async. try this

.get("/:profile", async (req,res,next) => {

const url = "https://trailblazer.me/id/hverma99";

async function getPage(url) {
const browser = await puppeteer.launch({headless: true});
const page = await browser.newPage();
await page.goto(url, {waitUntil: 'networkidle0'});

const html = await page.content(); // serialized HTML of page DOM.
await browser.close();
return html;
}

const html = await getPage(url);
const $ = cheerio.load(html);
const span = $('.tds-tally__count.tds-tally__count_success');
console.log(span.text());

});

Also need to put async like this - async (req,res,next)

How to scrape website which is dynamically rendered with filters?

I think easiest way to do it would be to take a look into network tab. My suspicion is that website is dynamicly loading this data fro some sort of API, so you actually need to scrape incoming data from that API and not website itself.



Related Topics



Leave a reply



Submit