I am working on a project where I'm scraping stock prices.
I am able to get the stock price in the console (from pageScraper.js file), but it's refusing to display in the DOM.
I keep getting an error that says "module not defined". I am asking because after hours of research, I give up and need some advice.
Here's my code - feel free to use it if you're interested in the scraping part.
index.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<script src="pageScraper.js">
</script>
<title>Document</title>
</head>
<body>
<div id="demo"></div>
</body>
</html>
browser.js
const puppeteer = require('puppeteer');
async function startBrowser() {
let browser;
try {
console.log('Opening the browser......');
browser = await puppeteer.launch({
headless: false,
args: ['--disable-setuid-sandbox'],
ignoreHTTPSErrors: true,
});
} catch (err) {
console.log('Could not create a browser instance => : ', err);
}
return browser;
}
module.exports = {
startBrowser,
};
index.js
const browserObject = require("./browser");
const scraperController = require("./pageController");
//Start the browser and create a browser instance
let browserInstance = browserObject.startBrowser();
// Pass the browser instance to the scraper controller
scraperController(browserInstance);
pageController.js
const pageScraper = require('./pageScraper');
async function scrapeAll(browserInstance) {
let browser;
try {
browser = await browserInstance;
await pageScraper.scraper(browser);
}
catch (err) {
console.log("Could not resolve the browser instance => ", err);
}
}
module.exports = (browserInstance) => scrapeAll(browserInstance)
pageScraper.js
const scraper = {
url: 'https://finance.yahoo.com/quote/TSLA/',
async scraper(browser) {
let page = await browser.newPage()
await page.goto(this.url);
console.log("page loaded");
for(var k = 1; k < 2000; k++){
var element = await page.waitForXPath("/html/body/div[1]/div/div/div[1]/div/div[2]/div/div/div[5]/div/div/div/div[3]/div[1]/div[1]/fin-streamer[1]")
var price = await page.evaluate(element => element.textContent, element);
console.log(price);
await page.waitForTimeout(1000);
let demo = document.getElementById("demo")
let displayPrice = "";
displayPrice = "<p>" + price + "</p>"
demo.innerHTML = displayPrice;
}
await browser.close();
},
};
module.exports = scraper;
...and package.json
{
"name": "wspuppeteer",
"version": "1.0.0",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"license": "ISC",
"dependencies": {
"babel": "^6.23.0",
"puppeteer": "^13.5.1"
},
"devDependencies": {
"@babel/core": "^7.17.8",
"webpack": "^5.70.0"
},
"description": ""
}
What's causing the problem?
startBrowser()is async, it returns a Promise. You need to await it withlet browserInstance = await browserObject.startBrowser();node-fetchandcheerio). In Puppeteer you also have a possibility to wait some time after the page was loaded, then you'll to be sure that all js files was loaded.