0

I request to all NodeJS/python experienced players please look at my issue. Please at least direct me to the path where i can solve this issue.

My python script ./webextraction.py at backend processes the 3 arguments (urls csv file or string, keywords and boolean checkbox) which the nodejs (app.js) sent from the client side. But the issue is that the python script is using webdriver.Chrome which first writes some instructions like this below in the command prompt

picture1

Also, in my nodejs script i am succesfully sent the above said system arguments to the python script and also get the output in the url localhost:4000/formsubmit, but the issue is that it prints only first line of the command prompt,

For example, see below

webpage

But i build code to see the processed csv string instead of this [WDM] - ====== WebDriver manager ======

Please see my nodejs code file app.js below and please guide me in this

/* csv to json */
const express = require("express"),
  app = express(),
  upload = require("express-fileupload"),
  csvtojson = require("csvtojson");

var http = require('http');
var path = require("path");
var bodyParser = require('body-parser');
var helmet = require('helmet');
var rateLimit = require("express-rate-limit");

let csvData = "test";
app.use(upload());

var server = http.createServer(app);

const limiter = rateLimit({
  windowMs: 15 * 60 * 1000, // 15 minutes
  max: 100 // limit each IP to 100 requests per windowMs
});

app.use(bodyParser.urlencoded({extended: false}));
app.use(express.static(path.join(__dirname,'./Final')));
app.use(helmet());
app.use(limiter);

server.listen(process.env.PORT || 3000, function() { 
    console.log('server running on port 3000'); 
})

app.get('/', function(req, res){
    res.sendFile(path.join(__dirname,'./index.html'));
});

// form submit request
app.post('/formsubmit', function(req, res){

    /** convert req buffer into csv string , 
    *   "csvfile" is the name of my file given at name attribute in input tag */
    csvData = req.files.csvfile.data.toString('utf8');

    // Send request to python script
    var spawn = require('child_process').spawn;
    var process = spawn('python', ["./webextraction.py", csvData, req.body.keywords, req.body.full_search])

    dataString = "";

    process.stdout.on('data', function(data){
    dataString += data.toString();
    });

    process.stdout.on('end', function(){
    console.log(dataString);
    });
    
    process.stdin.end();

    process.stderr.on('data', function(data){
        res.send("<p>"+data.toString()+"</p>");
    });

});

I have tried below code which receives data in chunks but still it is not working

    process.stdout.on('data', chunk => chunks.push(chunk));

    process.stdout.on('end', () => {

        try {
            // If JSON handle the data
            const data = JSON.parse(Buffer.concat(chunks).toString());

            console.log(data);

        } catch (e) {
            // Handle the error
            console.log(result);
        }
    });

Below is the some part of the python code file ./webextraction.py which create csv file using the dataframe. This python code has csv string. Please note the input csv and output csv file can have more number of rows i.e. in 1000s

    colList = ['Found urls', 'Not found urls','Error urls']
    dframe = pd.DataFrame(columns = colList, dtype = str)

    maxlen = get_max_of_list(found_results_A, found_keywords_list_changed, notfound_results, error_urls)

    found_results_A = append_space(found_results_A, maxlen)
    notfound_results = append_space(notfound_results, maxlen)
    error_urls = append_space(error_urls, maxlen)
    found_keywords_list_changed = append_space(found_keywords_list_changed, maxlen)

    if(len(found_results_A) == maxlen and len(notfound_results) == maxlen and len(error_urls) == maxlen and len(found_keywords_list_changed) == maxlen):    
        dframe['Found urls'] = found_results_A
        dframe['keywords'] = found_keywords_list_changed
        dframe['Not found urls'] = notfound_results
        dframe['Error urls'] = error_urls

    dframe = dframe.sort_values(by=["Found urls"], ascending=False)
    data = dframe.dropna(axis=0, how='all', thresh=None, subset=None, inplace=False)
    export_csv = dframe.to_csv(encoding = 'ASCII', index = None, header = True)
    print(export_csv)

Below file code is of index.html which send the content to nodejs

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Keyword searcher</title>
</head>
<body>
    <div class="row">
        <div class="col">
            
            <form method="post" enctype="multipart/form-data" action="/formsubmit">
                <label for="csvfile">Upload csv file (having URLs)</label><br>
                <input type="file" name="csvfile" id="csvfile" accept=".csv"><br>
                <p>Enter keywords to search (separated by comma(,))</p>
                <input type="text" name="keywords" id="keywords"><br>
                <label for="full_search">Full search</label>
                <input type="checkbox" name="full_search" id="full_search"><br>
                <button type="submit">Submit</button>
            </form>

            <div id="status"></div>

        </div>
    </div>    
</body>
</html>
3
  • I am also thinking to make GUI using Tkinter Python but still i think this issue will remain becoz of the webdriver.Chrome, earlier i was using beautiful soup but that was not usefull if the website adds dynamic contents using some scripts, that's why i changed code to webdriver.Chrome Commented Feb 17, 2021 at 6:06
  • Can this will solve me issue stackoverflow.com/questions/51352274/… Commented Feb 17, 2021 at 7:14
  • I have actually no idea how to serialize the output message printed on the command prompt, becoz i need only the final csv string as output of the python script Commented Feb 17, 2021 at 7:15

0

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.