0

I am working on small idea to collect errors from pages and to store them in DB and then use graph API to display information visually.

There is 8 sites and on each of them there is 100 entries - so 800 transactions per time. I loop through each site and then sub-loop through table of errors and collect them.

I got it working if I make insert query on each of those sub-loops for all 800 entries but I am getting some sort of memory leak from so many transactions and after few minutes - Node breaks due to memory exceeding.

So I tried queuing all 800 entries into Array of Arrays and then performing multi-insert at the end of every iteration but I am getting ER_PARSE_ERROR.

var tabletojson = require('tabletojson');
var mysql = require("mysql");
var striptag = require("striptags");
var fs = require("fs");
var path = require('path');

var startCollector;
var iterations = 0;
var insertions = 0;
var duplicated = 0;

var datas = [];

var clients = ["ClientA", "ClientB", "ClientC", "ClientD", "ClientE", "ClientF", "ClientG", "ClientH"];
var appDir = path.dirname(require.main.filename);

var errorList = ["err1", "err2", "err3", "err4", "err5", "err6"];

var con = mysql.createPool({
    host: "localhost",
    user: "User",
    password: "Password",
    database: "errors"
  });

function CollectErrors() {
    startCollector = new Date();
    for(var a = 0; a < clients.length; a++) {
        (function(a) {
            tabletojson.convertUrl("http://example.com" + clients[a] + "/page.php?limit=100", { stripHtmlFromCells: false }, function(response) {
            var rs = response[0];
                for(var l = rs.length-1; l > -1; l--) {
                    var newDate = formatDate(striptag(rs[l]["Date"]), striptag(rs[l]["Time"]));
                    var user = getUser(striptag(rs[l]["User"]));
                    var msg = striptag(rs[l]["Error"]);
                    var splitError = rs[l]["Error"].split("<a href=\"");
                    var link = getUrl(splitError[1]);
                    var id = getId(link);
                    var type = getType(striptag(splitError[0]));
                    var temp = [newDate, link, type, user, clients[a], id, msg];
                    datas.push(temp);
                }
                });
        })(a);
    }
    con.getConnection(function(err, connection) {
        connection.query("INSERT IGNORE INTO entries (time, url, type, author, client, uid, message) VALUES ?", [datas], function(err, rows) {
            console.log(err);
        });
        connection.release();
        datas = [];
    });
    setTimeout(CollectErrors, 10000);

}



function formatDate(date, time) {
    var newdate = date.split("/").reverse().join("-");
    var newtime = time+":00";
    return newdate + " " + newtime;
}

function getUrl(uri) {
    return "http://example.com/"+uri.split("\">Details")[0];
}

function getId(url) {
    return decodeURIComponent((new RegExp('[?|&]' + "id" + '=' + '([^&;]+?)(&|#|;|$)').exec(url) || [null, ''])[1].replace(/\+/g, '%20')) || null;
}

function getType(error) {
    for(var a = 0; a < errorList.length; a++) {
        if(error.indexOf(errorList[a]) !== -1) {
            return errorList[a];
        }
    }
    return "Other";
}

function getUser(user) {
    if(user == "" || user == "&#xA0;" || user == null) {
        return "System";
    }
    return user;
}


CollectErrors();

I've tried mysql.createConnection too but that also gave me same issue.

I've been stuck for past 12 hours and I can't see what's wrong, I've even tried populating Datas table with just strings but got same error.

1 Answer 1

2

I've changed your code to use ES6 and correct modules features.
Useful links: correct pooling with mysql, correct insert query, async/await, IIFE, enhanced object

const tabletojson = require('tabletojson'),
  mysql = require("mysql"),
  striptag = require("striptags"),
  fs = require("fs"),
  path = require('path');

const startCollector,
  iterations = 0,
  insertions = 0,
  duplicated = 0;

let datas = [];

const clients = ["ClientA", "ClientB", "ClientC", "ClientD", "ClientE", "ClientF", "ClientG", "ClientH"];
const appDir = path.dirname(require.main.filename);

const errorList = ["err1", "err2", "err3", "err4", "err5", "err6"];

const con = mysql.createPool({
  host: "localhost",
  user: "User",
  password: "Password",
  database: "errors"
});
// We'll use async/await from ES6
const collectErrors = async() => {
  // Up to here I've only changed syntax to ES6
  let startCollector = new Date();

  // We'll try to iterate through each client. And we use here for..of syntax to allow us using await
  for (let client of clients) {
    // Please, check that client value return correct data. If not, change for..of to your for..each and client variable to clients[a]
    const tbj = await tabletojson.convertUrl("http://example.com" + client + "/page.php?limit=100", {
      stripHtmlFromCells: false
    });
    const result = tgj[0];
    for (rs of result) {
      // I can't check this part, but I hope your example was with correct values.
      let newDate = formatDate(striptag(rs[l]["Date"]), striptag(rs[l]["Time"]));
      let user = getUser(striptag(rs[l]["User"]));
      let link = getUrl(splitError[1]);
      let msg = striptag(rs[l]["Error"]);
      let id = getId(link);
      let splitError = rs[l]["Error"].split("<a href=\"");
      let getType = getType(striptag(splitError[0]));
      // ES6 enhanced object syntax
      datas.push({
        newDate,
        user,
        msg,
        id,
        splitError,
        link,
        getType,
        temp: [newDate, link, type, user, client, id, msg]
      });
    }
  }
  // OK, here we have fulfilled datas array. And we want to save it.
  con.getConnection((err, connection) => {
    // Please, notice, here I've changed your insert query to prepared statement.
    connection.query("INSERT IGNORE INTO entries SET ?", datas, (err, rows) => {
      console.log(err);
      connection.release();
      datas = [];
    });
  });
  // I don't see why do you need timeout here, so I've left it commented.
  // setTimeout(CollectErrors, 10000);
};

// Here your other methods go....

// And to call your async function we'll use IIFE
(async() => {
  await collectErrors();
})();

Probably there may be errors with mysql insert, but that's not for sure. If occurred, please write in comments and I'll help you with that.

Sign up to request clarification or add additional context in comments.

3 Comments

setTimeout is required as I need this script to run indefinitely every set interval.
@arleitiss look to npmjs.com/package/node-cron with this module you can handle your script starts in proficient way
Accepting this answer. I didn't use the full solution provided but mixed some parts. It works as required now.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.