0

I am working on a project where I'm scraping stock prices.

I am able to get the stock price in the console (from pageScraper.js file), but it's refusing to display in the DOM.

I keep getting an error that says "module not defined". I am asking because after hours of research, I give up and need some advice.

Here's my code - feel free to use it if you're interested in the scraping part.

index.html

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <script src="pageScraper.js">
    </script>
     <title>Document</title>
</head>
<body>  
    <div id="demo"></div>
</body>
</html>

browser.js

const puppeteer = require('puppeteer');

async function startBrowser() {
  let browser;
  try {
    console.log('Opening the browser......');
    browser = await puppeteer.launch({
      headless: false,
      args: ['--disable-setuid-sandbox'],
      ignoreHTTPSErrors: true,
    });
  } catch (err) {
    console.log('Could not create a browser instance => : ', err);
  }
  return browser;
}

module.exports = {
  startBrowser,
};

index.js

const browserObject = require("./browser");
const scraperController = require("./pageController");

//Start the browser and create a browser instance
let browserInstance = browserObject.startBrowser();

// Pass the browser instance to the scraper controller
scraperController(browserInstance);

pageController.js

const pageScraper = require('./pageScraper');
async function scrapeAll(browserInstance) {
  let browser;
  try {
    browser = await browserInstance;
    await pageScraper.scraper(browser);

  }
  catch (err) {
    console.log("Could not resolve the browser instance => ", err);
  }
}

module.exports = (browserInstance) => scrapeAll(browserInstance)

pageScraper.js

const scraper = {
    url: 'https://finance.yahoo.com/quote/TSLA/',
    async scraper(browser) {
        let page = await browser.newPage()

        await page.goto(this.url);

        console.log("page loaded");
        for(var k = 1; k < 2000; k++){
            var element = await page.waitForXPath("/html/body/div[1]/div/div/div[1]/div/div[2]/div/div/div[5]/div/div/div/div[3]/div[1]/div[1]/fin-streamer[1]")
            var price = await page.evaluate(element => element.textContent, element);
            console.log(price);
            await page.waitForTimeout(1000);
            let demo = document.getElementById("demo")
            let displayPrice = "";
            displayPrice = "<p>" + price + "</p>"
            demo.innerHTML = displayPrice;
          }
        
        await browser.close();
    },
};

module.exports = scraper;

...and package.json

{
  "name": "wspuppeteer",
  "version": "1.0.0",
  "main": "index.js",
  "scripts": {
    "test": "echo \"Error: no test specified\" && exit 1"
  },
  "keywords": [],
  "author": "",
  "license": "ISC",
  "dependencies": {
    "babel": "^6.23.0",
    "puppeteer": "^13.5.1"
  },
  "devDependencies": {
    "@babel/core": "^7.17.8",
    "webpack": "^5.70.0"
  },
  "description": ""
}

What's causing the problem?

3
  • startBrowser() is async, it returns a Promise. You need to await it with let browserInstance = await browserObject.startBrowser(); Commented Mar 31, 2022 at 7:56
  • Pupetteer is a browser. You are trying to run a browser in a browser. It is meant to be run in Node.js Commented Mar 31, 2022 at 7:57
  • IMHO Pupetteer is not a best option to fetch a data (I'll rather use node-fetch and cheerio). In Puppeteer you also have a possibility to wait some time after the page was loaded, then you'll to be sure that all js files was loaded. Commented Mar 31, 2022 at 8:12

1 Answer 1

1

pageScraper.js has this line:

module.exports = scraper;

It is a CommonJS module designed to be requireed from another module as part of a program running under Node.js. When you require it using Node.js it will have a module variable defined in its scope (because that is how CommonJS modules work).

You have this:

<script src="pageScraper.js">

So you are trying to load it as a JavaScript script (not a module) in a browser.

Browsers do not support CommonJS modules. So they don't create the module variable, so when you try to read it to assign a new property to it, you get an exception.

Browsers also don't support the APIs that puppeteer require (like being able to start a headless instance of chromium in the background).


You can't run code designed for Node.js in a browser.

You need to discard index.html and run index.js using Node.js.

Sign up to request clarification or add additional context in comments.

2 Comments

Oh I didn't notice OP was trying to run the code in a browser, nice catch
thank you for taking your time to answer my question - it gives me some insight on what to look for next to make this happen.. :)

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.