I request to all NodeJS/python experienced players please look at my issue. Please at least direct me to the path where i can solve this issue.
My python script ./webextraction.py at backend processes the 3 arguments (urls csv file or string, keywords and boolean checkbox) which the nodejs (app.js) sent from the client side. But the issue is that the python script is using webdriver.Chrome which first writes some instructions like this below in the command prompt
Also, in my nodejs script i am succesfully sent the above said system arguments to the python script and also get the output in the url localhost:4000/formsubmit, but the issue is that it prints only first line of the command prompt,
For example, see below
But i build code to see the processed csv string instead of this [WDM] - ====== WebDriver manager ======
Please see my nodejs code file app.js below and please guide me in this
/* csv to json */
const express = require("express"),
app = express(),
upload = require("express-fileupload"),
csvtojson = require("csvtojson");
var http = require('http');
var path = require("path");
var bodyParser = require('body-parser');
var helmet = require('helmet');
var rateLimit = require("express-rate-limit");
let csvData = "test";
app.use(upload());
var server = http.createServer(app);
const limiter = rateLimit({
windowMs: 15 * 60 * 1000, // 15 minutes
max: 100 // limit each IP to 100 requests per windowMs
});
app.use(bodyParser.urlencoded({extended: false}));
app.use(express.static(path.join(__dirname,'./Final')));
app.use(helmet());
app.use(limiter);
server.listen(process.env.PORT || 3000, function() {
console.log('server running on port 3000');
})
app.get('/', function(req, res){
res.sendFile(path.join(__dirname,'./index.html'));
});
// form submit request
app.post('/formsubmit', function(req, res){
/** convert req buffer into csv string ,
* "csvfile" is the name of my file given at name attribute in input tag */
csvData = req.files.csvfile.data.toString('utf8');
// Send request to python script
var spawn = require('child_process').spawn;
var process = spawn('python', ["./webextraction.py", csvData, req.body.keywords, req.body.full_search])
dataString = "";
process.stdout.on('data', function(data){
dataString += data.toString();
});
process.stdout.on('end', function(){
console.log(dataString);
});
process.stdin.end();
process.stderr.on('data', function(data){
res.send("<p>"+data.toString()+"</p>");
});
});
I have tried below code which receives data in chunks but still it is not working
process.stdout.on('data', chunk => chunks.push(chunk));
process.stdout.on('end', () => {
try {
// If JSON handle the data
const data = JSON.parse(Buffer.concat(chunks).toString());
console.log(data);
} catch (e) {
// Handle the error
console.log(result);
}
});
Below is the some part of the python code file ./webextraction.py which create csv file using the dataframe. This python code has csv string. Please note the input csv and output csv file can have more number of rows i.e. in 1000s
colList = ['Found urls', 'Not found urls','Error urls']
dframe = pd.DataFrame(columns = colList, dtype = str)
maxlen = get_max_of_list(found_results_A, found_keywords_list_changed, notfound_results, error_urls)
found_results_A = append_space(found_results_A, maxlen)
notfound_results = append_space(notfound_results, maxlen)
error_urls = append_space(error_urls, maxlen)
found_keywords_list_changed = append_space(found_keywords_list_changed, maxlen)
if(len(found_results_A) == maxlen and len(notfound_results) == maxlen and len(error_urls) == maxlen and len(found_keywords_list_changed) == maxlen):
dframe['Found urls'] = found_results_A
dframe['keywords'] = found_keywords_list_changed
dframe['Not found urls'] = notfound_results
dframe['Error urls'] = error_urls
dframe = dframe.sort_values(by=["Found urls"], ascending=False)
data = dframe.dropna(axis=0, how='all', thresh=None, subset=None, inplace=False)
export_csv = dframe.to_csv(encoding = 'ASCII', index = None, header = True)
print(export_csv)
Below file code is of index.html which send the content to nodejs
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Keyword searcher</title>
</head>
<body>
<div class="row">
<div class="col">
<form method="post" enctype="multipart/form-data" action="/formsubmit">
<label for="csvfile">Upload csv file (having URLs)</label><br>
<input type="file" name="csvfile" id="csvfile" accept=".csv"><br>
<p>Enter keywords to search (separated by comma(,))</p>
<input type="text" name="keywords" id="keywords"><br>
<label for="full_search">Full search</label>
<input type="checkbox" name="full_search" id="full_search"><br>
<button type="submit">Submit</button>
</form>
<div id="status"></div>
</div>
</div>
</body>
</html>


webdriver.Chrome, earlier i was using beautiful soup but that was not usefull if the website adds dynamic contents using some scripts, that's why i changed code towebdriver.Chrome