I have this with node 25.0.0 and Windows and puppeteer. Basically the get_card_search_results is called and it loads a page that features lazy loading. It will "scroll" the page down to ensure that the entire page loads.
This part works with no issues:
const puppeteer = require('puppeteer');
var browser;
var puppeteer_options = {
headless: true,
ignoreHTTPSErrors: true,
args: ['--disable-dev-shm-usage', '--shm-size=4gb'],
defaultViewport: {width: 1920, height: 1080},
protocolTimeout: 1500000, //https://www.puppeteersharp.com/api/PuppeteerSharp.LaunchOptions.html
}
try
{
browser = await puppeteer.launch(puppeteer_options);
var page = await browser.newPage();
}
catch (err) {
console.log(err);
}
async function get_card_search_results(options)
{
await scroll_current_page({url:url, gp_args:gp_args})
}
async function scroll_current_page(options)
{
await page.goto(url, {waitUntil: 'networkidle2'}); //, {waitUntil: 'networkidle2'}
await page.setDefaultNavigationTimeout(0)
cli_log({msg: 'starting scroll', log_name: log_name})
//enable logging inside page.evaluate
await page.exposeFunction('logInNodeJs', (value) => cli_log({msg: value, log_name: log_name}));
//scroll down the page to get all html
await page.evaluate(async ([gp_args]) => {
var retval = true
await new Promise((resolve) => {
var retval = {}
var el = document.documentElement
logInNodeJs('scrollHeight: ' + el.scrollHeight)
var cur_scroll_top = el.scrollTop
logInNodeJs('cur_scroll_top start: ' + cur_scroll_top)
var prev_scroll_top = cur_scroll_top
logInNodeJs('prev_scroll_top start: ' + prev_scroll_top)
scroll_page({
el: el,
cur_scroll_top: cur_scroll_top,
prev_scroll_top: prev_scroll_top,
safety: 0,
})
function scroll_page(options)
{
var el = options.el
var cur_scroll_top = options.cur_scroll_top
var prev_scroll_top = options.prev_scroll_top
var safety = options.safety
el.scrollTop += gp_args['scroll_step']
var cur_scroll_top = el.scrollTop
//ProtocolError: Runtime.callFunctionOn timed out. Increase the 'protocolTimeout' setting in launch/connect calls for a higher timeout if needed.
logInNodeJs('cur_scroll_top: ' + cur_scroll_top + ' previous: ' + prev_scroll_top)
var max_scroll = false
if(gp_args['max_scroll'])
{
max_scroll = (cur_scroll_top >= gp_args['max_scroll'])
}
if((cur_scroll_top == prev_scroll_top) || max_scroll)
{
logInNodeJs('end reached!')
resolve(true);
}
else
{
var prev_scroll_top = cur_scroll_top
logInNodeJs('prev_scroll_top: ' + prev_scroll_top)
setTimeout(function(){scroll_page({
el: el,
cur_scroll_top: cur_scroll_top,
prev_scroll_top: prev_scroll_top,
safety: safety,
})}, 1375)
}
}
return true;
})
return retval
}, [gp_args])
return page
}
I get output like:
cl: starting scroll
cl: scrollHeight: 4907
cl: cur_scroll_top start: 0
cl: prev_scroll_top start: 0
cl: cur_scroll_top: 500 previous: 0
cl: prev_scroll_top: 500
cl: cur_scroll_top: 1000 previous: 500
cl: prev_scroll_top: 1000
cl: cur_scroll_top: 1500 previous: 1000
cl: prev_scroll_top: 1500
cl: cur_scroll_top: 2000 previous: 1500
cl: prev_scroll_top: 2000
cl: cur_scroll_top: 2500 previous: 2000
cl: prev_scroll_top: 2500
cl: cur_scroll_top: 3000 previous: 2500
cl: prev_scroll_top: 3000
cl: cur_scroll_top: 3500 previous: 3000
cl: prev_scroll_top: 3500
cl: cur_scroll_top: 3825 previous: 3500
cl: prev_scroll_top: 3825
cl: cur_scroll_top: 3825 previous: 3825
cl: end reached!
cl: scroll ended
However, if I modify the get_card_search_results function like this and add this second part to extract some information from the page:
async function get_card_search_results(options)
{
await scroll_current_page({url:url, gp_args:gp_args})
var total_pages = await page.evaluate(([gp_args]) => {
var retval = false
await new Promise((resolve) => {
var el = document.querySelector('section[data-testid="paginationWrapper"] section article:nth-child(2)')
var retval = el.childNodes.length
})
return retval
}, [gp_args])
}
the entire script hangs at this point with this:
TimeoutError: Navigation timeout of 30000 ms exceeded
at new Deferred (c:\code\get_the_data\node_modules\puppeteer-core\lib\cjs\puppeteer\util\Deferred.js:60:34)
at Deferred.create (c:\code\get_the_data\node_modules\puppeteer-core\lib\cjs\puppeteer\util\Deferred.js:21:16)
at new LifecycleWatcher (c:\code\get_the_data\node_modules\puppeteer-core\lib\cjs\puppeteer\cdp\LifecycleWatcher.js:73:60)
at CdpFrame.goto (c:\code\get_the_data\node_modules\puppeteer-core\lib\cjs\puppeteer\cdp\Frame.js:149:29)
at CdpFrame.<anonymous> (c:\code\get_the_data\node_modules\puppeteer-core\lib\cjs\puppeteer\util\decorators.js:109:27)
at CdpPage.goto (c:\code\get_the_data\node_modules\puppeteer-core\lib\cjs\puppeteer\api\Page.js:572:43)
at scroll_current_page (c:\code\get_the_data\get_the_data.js:831:14)
at get_card_search_results (c:\code\get_the_data\get_the_data.js:804:10)
at c:\code\get_the_data\get_the_data.js:279:22
I haven't been able to figure out why adding this second page.evaluate causes it to hang.
var total_pages, you're not resolving the promise. I also think you'll be fine withoutPromise().