Download large file with node.js avoiding high memory consumption

Question

I`m trying to create a file downloader as a background service but when a large file is scheduled, it's first put in memory and then, at the end of the download the file is written to disk.

How can I make the file be wrote gradually to the disk preserving memory considering that I may have lots of files being downloaded at the same time?

Here's the code I`m using:

var sys = require("sys"),
    http = require("http"),
    url = require("url"),
    path = require("path"),
    fs = require("fs"),
    events = require("events");

var downloadfile = "http://nodejs.org/dist/node-v0.2.6.tar.gz";

var host = url.parse(downloadfile).hostname
var filename = url.parse(downloadfile).pathname.split("/").pop()

var theurl = http.createClient(80, host);
var requestUrl = downloadfile;
sys.puts("Downloading file: " + filename);
sys.puts("Before download request");
var request = theurl.request('GET', requestUrl, {"host": host});
request.end();

var dlprogress = 0;


setInterval(function () {
   sys.puts("Download progress: " + dlprogress + " bytes");
}, 1000);


request.addListener('response', function (response) {
    response.setEncoding('binary')
    sys.puts("File size: " + response.headers['content-length'] + " bytes.")
    var body = '';
    response.addListener('data', function (chunk) {
        dlprogress += chunk.length;
        body += chunk;
    });
    response.addListener("end", function() {
        fs.writeFileSync(filename, body, 'binary');
        sys.puts("After download finished");
    });

});

Any chance you could share the final result? I'm looking for something like this... — Eli
– Eli, Commented Oct 7, 2011 at 17:40
I tried to implement a feature to follow 302 redirects but I dont think it's working properly. Maybe you could try. There it is: gist.github.com/1297063 — Carlosedp
– Carlosedp, Commented Oct 18, 2011 at 23:33

Carlosedp · Accepted Answer · 2011-01-24 17:45:55Z

32

I changed the callback to:

request.addListener('response', function (response) {
        var downloadfile = fs.createWriteStream(filename, {'flags': 'a'});
        sys.puts("File size " + filename + ": " + response.headers['content-length'] + " bytes.");
        response.addListener('data', function (chunk) {
            dlprogress += chunk.length;
            downloadfile.write(chunk, encoding='binary');
        });
        response.addListener("end", function() {
            downloadfile.end();
            sys.puts("Finished downloading " + filename);
        });

    });

This worked perfectly.

answered Jan 24, 2011 at 17:45

Carlosedp

1,9611 gold badge17 silver badges15 bronze badges

Sign up to request clarification or add additional context in comments.

2 Comments

Thiago Arrais Over a year ago

Aren't we supposed to prefer setEncoding(null) instead of 'binary'?

respectTheCode Over a year ago

{'flags': 'a'} will append the data to the file if it already exists

Carter Cole · Accepted Answer · 2012-07-25 21:14:12Z

5

does the request package work for your uses?

it lets you do things like this:

request(downloadurl).pipe(fs.createWriteStream(downloadtohere))

answered Jul 25, 2012 at 21:14

Carter Cole

9249 silver badges16 bronze badges

2 Comments

Brad Over a year ago

You don't even need request for this. Just pipe res from http.get or whatever is being used.

Haris Mehmood Over a year ago

it crashes at exactly 4gb, any idea why and how can we let it download large files upto 10gb ?

Lee Goddard · Accepted Answer · 2015-11-04 11:59:14Z

5

Take a look at http-request:

// shorthand syntax, buffered response
http.get('http://localhost/get', function (err, res) {
    if (err) throw err;
    console.log(res.code, res.headers, res.buffer.toString());
});

// save the response to 'myfile.bin' with a progress callback
http.get({
    url: 'http://localhost/get',
    progress: function (current, total) {
        console.log('downloaded %d bytes from %d', current, total);
    }
}, 'myfile.bin', function (err, res) {
    if (err) throw err;
    console.log(res.code, res.headers, res.file);
});

answered Nov 4, 2015 at 11:59

Lee Goddard

11.3k5 gold badges53 silver badges71 bronze badges

Comments

Alexis Tyler · Accepted Answer · 2015-10-20 08:19:06Z

2

When downloading large file please use fs.write and not writeFile as it will override the previous content.

function downloadfile(res) {
    var requestserver = http.request(options, function(r) {
        console.log('STATUS: ' + r.statusCode);
        console.log('HEADERS: ' + JSON.stringify(r.headers));

        var fd = fs.openSync('sai.tar.gz', 'w');

        r.on('data', function (chunk) {
            size += chunk.length;
            console.log(size+'bytes received');
            sendstatus(res,size);
            fs.write(fd, chunk, 0, chunk.length, null, function(er, written) {
            });
        });
        r.on('end',function(){
            console.log('\nended from server');
            fs.closeSync(fd);
            sendendstatus(res);
        });
    });
}

edited Oct 20, 2015 at 8:19

Alexis Tyler

9766 gold badges32 silver badges51 bronze badges

answered Jun 2, 2011 at 5:32

Munipratap

5495 silver badges9 bronze badges

2 Comments

respectTheCode Over a year ago

fs.write isn't safe if you don't wait for the callback. You should use a WriteStream.

Brad Over a year ago

Far better just to pipe res to the writable file stream.

mhitza · Accepted Answer · 2011-01-23 01:20:32Z

0

Instead of holding the content into memory in the "data" event listener you should write to the file in append mode.

answered Jan 23, 2011 at 1:20

mhitza

5,7152 gold badges32 silver badges52 bronze badges

Comments

Noah · Accepted Answer · 2013-01-30 01:40:32Z

0

Use streams like Carter Cole suggested. Here is a more complete example

var inspect = require('eyespect').inspector();
var request = require('request');
var filed = require('filed');
var temp = require('temp');
var downloadURL = 'http://upload.wikimedia.org/wikipedia/commons/e/ec/Hazard_Creek_Kayaker.JPG';
var downloadPath = temp.path({prefix: 'singlePageRaw', suffix: '.jpg'});

var downloadFile = filed(downloadPath);
var r = request(downloadURL).pipe(downloadFile);


r.on('data', function(data) {
  inspect('binary data received');
});
downloadFile.on('end', function () {
  inspect(downloadPath, 'file downloaded to path');
});

downloadFile.on('error', function (err) {
  inspect(err, 'error downloading file');
});

You may need to install modules which you can do via npm install filed request eyespect temp

answered Jan 30, 2013 at 1:40

Noah

34.4k5 gold badges40 silver badges34 bronze badges

1 Comment

Niels Abildgaard Over a year ago

There is no reason to use eyespect, filed, or temp. The example is good, but looks bloated.

Collectives™ on Stack Overflow

Download large file with node.js avoiding high memory consumption

6 Answers 6

2 Comments

2 Comments

Comments

2 Comments

Comments

1 Comment

Your Answer

Linked

Hot Network Questions

Collectives™ on Stack Overflow

6 Answers 6

2 Comments

2 Comments

Comments

2 Comments

Comments

1 Comment

Your Answer

Sign up or log in

Post as a guest

Linked

Related