Trying to make a simple Tumblr scraper using node.js
var request = require('request');
var fs = require('fs');
var apiKey = 'my-key-here';
var offset = 0;
for (var i=0; i<5; i++) {
console.log('request #' + i + '...');
var requestURL = 'http://api.tumblr.com/v2/blog/blog.tumblr.com/posts/text?api_key='
+ apiKey
+ '&offset='
+ offset;
console.log(requestURL);
request(requestURL, function(error, response, body) {
if (!error && response.statusCode == 200) {
var resultAsJSON = JSON.parse(body);
resultAsJSON.response.posts.forEach(function(obj) {
fs.appendFile('content.txt', offset + ' ' + obj.title + '\n', function (err) {
if (err) return console.log(err);
});
offset++;
});
}
});
}
By default, the API only returns a maximum of 20 latest posts. I want to grab all the posts instead. As a test, I want to get the latest 100 first, hence the i<5in the loop declaration.
The trick to do it is to use the offset parameter. Given an offset value of 20, for example, the API will not return the latest 20, but instead returns posts starting from the 21st from the top.
As I can't be sure that the API will always return 20 posts, I am using offset++ to get the correct offset number.
The code above works, but console.log(requestURL) returns http://api.tumblr.com/v2/blog/blog.tumblr.com/posts/text?api_key=my-key-here&offset=0
five times.
So my question is, why does the offset value in my requestURL remains as 0, even though I have added offset++?
offsetis zero for all of them. You need an asynchronous for-each loop.offsetvariable inappendFile, and they showed up correctly in the text file from 0 to 99.