You can use request and cheerio modules in NodeJs to load page contents and get all bodies.
let request = require('request'),
cheerio = require('cheerio'),
q = require('q')
You need a function which get url and load the page
function loadPage(url) {
var deferred = q.defer();
request.get({
url: url,
timeout: 60000
},
function (error, response, body) {
if (!error) {
deferred.resolve(body);
}
else {
deferred.reject(error);
}
}
);
return deferred.promise;
}
You need to find all links in the body. For that use a function like this
function extractLinks(body) {
var deferred = q.defer(),
$ = cheerio.load(body),
links = [];
$(/*Your selector*/).each(function() {
// add links to links array
});
deferred.resolve(links);
return deferred.promise;
}
Then you must again load body of each item in links array. So use the loadPage module again. And a function using cheerio to extract your data of new pages.
loadPage(firstUrl)
.then(function(result){
extractLinks(result)
.then(function(res){
var arr = [];
for(var r in res) {
arr.push(loadPage(res[r]));
q.all(arr)
.then()
.catch();
}
});
})
.catch(function(error) {
console.log(error);
});
$('.agent-info').find('a').trigger('click')