I am going to scrape some contents from a website that use javascript to load dynamic content. Before, I have used request and cheerio to scrape and they worked just fine. But I just find out that request and cheerio cannot scrape dynamic content. After do a research, I found phantomjs that can get all the content after the page has loaded. I have a problem with it now, I cannot use jQuery selector like I used to use in cheerio. This is my sample code but the selector is return nothing.
var page = require('webpage').create();
var url = 'http://angkorauto.com/vehicle';
page.open(url, function (status) {
if (status !== 'success') {
console.log('Unable to load the address!');
phantom.exit();
} else {
window.setTimeout(function () {
// console.log(page.content);
page.includeJs('https://cdnjs.cloudflare.com/ajax/libs/jquery/3.1.1/jquery.min.js', function(){
page.evaluate(function(){
console.log($('.divTitle').find('a').attr('href'));
});
});
phantom.exit();
}, 1500);
}
});
Could you help me with this problem? I really get stuck now.
Thanks for ur time to help.