0

I'm trying to run a function and once that function is complete, then run another function. The first function reads a CSV file, makes a GET request, and builds an object. The second function uses that newly created object to create a new CSV file.

The problem I'm having is that the new CSV file is being created prior to the GET requests finishing.

I'm using async.parallel to set the flow, but not able to get the logic right.

I'd love to know what I'm doing wrong and better understand how node thinks about these tasks.

// Require
var request = require('request');
var fs = require('fs');
var json2csv = require('json2csv');
var csv = require('csv');
var async = require('async');

// Params
var emailHunter_apiKey = '0000';
var emails = [];
var fields = ['email'];
var i = 0;

// Start
async.parallel([
        function(callback){
            setTimeout(function(){
                var file = fs.readFileSync('file.csv');
                csv.parse(file, {delimiter: ','}, function (err, data) {
                    for (var key in data) {
                        if (i < 5) {
                            if (data.hasOwnProperty(key)) {
                                var h = data[key];
                                if (h[5] != '') {
                                    var url = h[5];
                                    url = url.replace('//', '');
                                    url = url.replace('www.', '');
                                    request('https://api.emailhunter.co/v1/search?domain=' + url + '&api_key=' + emailHunter_apiKey + '', function (error, response, body) {
                                        if (!error && response.statusCode == 200) {
                                            var json = JSON.parse(body);
                                            for (var subObj in json) {
                                                if (json.hasOwnProperty(subObj) && subObj == 'emails') {
                                                    var emailObj = json[subObj];
                                                    for (var key in emailObj) {
                                                        var email = {
                                                            'email': emailObj[key]['value']
                                                        };
                                                        emails.push(email);
                                                    }
                                                }
                                            }
                                        }
                                    });
                                }
                            }
                        }
                        i++;
                    }
                });
                callback(null, emails);
            }, 200);
            console.log(emails);
        }
    ],
    function(err, results){
        json2csv({data: results, fields: fields}, function (err, csv) {
            if (err) console.log(err);
            fs.writeFile('export.csv', csv, function (err) {
                if (err) throw err;
                console.log('file saved');
            });
        });
        console.log(results);
    });
2
  • Firstly, you're using async.parallel wrong, since you only have 1 function in its 1st argument. It's supposed to take an array of (multiple) functions. And secondly, I think Promises might be more suited for your need, look into those. Basically you're dependent on multiple executions of request (which is async) being finished, so you'll need to promisify it and check when all promises have been resolved. You'll learn more about it when you discover promises. Lastly, your code is a callback-hell which you should avoid for readability's sake. Commented May 21, 2015 at 3:38
  • Thanks for running through that. I'm definitely trying to wrap my head around the callback structure and understanding how node thinks/works. That makes sense about the promises suggestions too. Thanks again for the help! Commented May 21, 2015 at 18:20

1 Answer 1

0

As laggingreflex mentioned, you're using async incorrectly.

First you should build a an array of functions that you want to execute in parallel. And then use async to execute them.

Furthermore, your callback was getting executed immediately because csv.parse() is an async function. Therefore node fires it immediately and then executes callback(). You need to move the callback inside of parse().

Try this...

// Params
var emailHunter_apiKey = '0000';
var emails = [];
var fields = ['email'];
var i = 0;
var functionsToRunAsync = [];

var file = fs.readFileSync('file.csv');
csv.parse(file, {delimiter: ','}, function (err, data) {
    for (var key in data) {
        if (i < 5) {
            if (data.hasOwnProperty(key)) {
                var h = data[key];
                if (h[5] != '') {
                    var url = h[5];
                    url = url.replace('//', '');
                    url = url.replace('www.', '');

                    // add a new function to an array, to be executed later
                    functionsToRunAsync.push(function(callback) {
                        request('https://api.emailhunter.co/v1/search?domain=' + url + '&api_key=' + emailHunter_apiKey + '', function (error, response, body) {
                            if (!error && response.statusCode == 200) {
                                var json = JSON.parse(body);
                                for (var subObj in json) {
                                    if (json.hasOwnProperty(subObj) && subObj == 'emails') {
                                        var emailObj = json[subObj];
                                        for (var key in emailObj) {
                                            var email = {
                                                'email': emailObj[key]['value']
                                            };
                                            emails.push(email);

                                            // callback to tell async this function is complete
                                            callback()
                                        }
                                    }
                                }
                            } else {
                                // callback to tell async this function is complete
                                callback
                            }
                        });
                    });

                }
            }
        }
        i++;
    }

    // now that we have all of the functions in an array, we run them in parallel
    async.parallel(

        functionsToRunAsync,

        function(err, results) {    // all async functions complete
            json2csv({data: results, fields: fields}, function (err, csv) {
                if (err) console.log(err);
                fs.writeFile('export.csv', csv, function (err) {
                    if (err) throw err;
                    console.log('file saved');
                });
            });
            console.log(results);
    });
});
Sign up to request clarification or add additional context in comments.

1 Comment

Thank you so much for running through that. That makes more sense and it worked. I like the idea of the array of parallel functions. I just started with node a few days ago, working on little scripts to learn.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.