0

I have this with node 25.0.0 and Windows and puppeteer. Basically the get_card_search_results is called and it loads a page that features lazy loading. It will "scroll" the page down to ensure that the entire page loads.

This part works with no issues:

const puppeteer = require('puppeteer');
var browser;

var puppeteer_options = {
    headless: true, 
    ignoreHTTPSErrors: true, 
    args: ['--disable-dev-shm-usage', '--shm-size=4gb'],
    defaultViewport: {width: 1920, height: 1080},
    protocolTimeout: 1500000, //https://www.puppeteersharp.com/api/PuppeteerSharp.LaunchOptions.html
}

try
{
    browser = await puppeteer.launch(puppeteer_options);

    var page = await browser.newPage();
}
catch (err) {
    console.log(err);
}

async function get_card_search_results(options)
{
   await scroll_current_page({url:url, gp_args:gp_args})
}

async function scroll_current_page(options)
{
    await page.goto(url, {waitUntil: 'networkidle2'}); //, {waitUntil: 'networkidle2'}

    await page.setDefaultNavigationTimeout(0)

    cli_log({msg: 'starting scroll', log_name: log_name})

    //enable logging inside page.evaluate
        await page.exposeFunction('logInNodeJs', (value) => cli_log({msg: value, log_name: log_name}));

    //scroll down the page to get all html
        await page.evaluate(async ([gp_args]) => {
            var retval = true
            await new Promise((resolve) => {
                var retval = {}
                
                var el = document.documentElement

                logInNodeJs('scrollHeight: ' + el.scrollHeight)

                var cur_scroll_top = el.scrollTop
                logInNodeJs('cur_scroll_top start: ' + cur_scroll_top)
                var prev_scroll_top = cur_scroll_top
                logInNodeJs('prev_scroll_top start: ' + prev_scroll_top)

                scroll_page({
                    el: el, 
                    cur_scroll_top: cur_scroll_top, 
                    prev_scroll_top: prev_scroll_top,
                    safety: 0,
                })

                function scroll_page(options)
                {
                    var el = options.el
                    var cur_scroll_top = options.cur_scroll_top
                    var prev_scroll_top = options.prev_scroll_top
                    var safety = options.safety
                    
                    el.scrollTop += gp_args['scroll_step']
                    
                    
                    var cur_scroll_top = el.scrollTop
                    //ProtocolError: Runtime.callFunctionOn timed out. Increase the 'protocolTimeout' setting in launch/connect calls for a higher timeout if needed.
                    logInNodeJs('cur_scroll_top: ' + cur_scroll_top + ' previous: ' + prev_scroll_top)
                    
                    var max_scroll = false
                    if(gp_args['max_scroll'])
                    {
                        max_scroll = (cur_scroll_top >= gp_args['max_scroll'])
                    }
                    
                    if((cur_scroll_top == prev_scroll_top) || max_scroll)
                    {
                        logInNodeJs('end reached!')
                        resolve(true);
                    }
                    else
                    {
                        var prev_scroll_top = cur_scroll_top
                        logInNodeJs('prev_scroll_top: ' + prev_scroll_top)
                        
                        setTimeout(function(){scroll_page({
                            el: el, 
                            cur_scroll_top: cur_scroll_top, 
                            prev_scroll_top: prev_scroll_top,
                            safety: safety,
                        })}, 1375)

                    }
                }
                
                return true;
            })
            
            return retval
        }, [gp_args])
    
    return page
}

I get output like:

cl: starting scroll
cl: scrollHeight: 4907
cl: cur_scroll_top start: 0
cl: prev_scroll_top start: 0
cl: cur_scroll_top: 500 previous: 0
cl: prev_scroll_top: 500
cl: cur_scroll_top: 1000 previous: 500
cl: prev_scroll_top: 1000
cl: cur_scroll_top: 1500 previous: 1000
cl: prev_scroll_top: 1500
cl: cur_scroll_top: 2000 previous: 1500
cl: prev_scroll_top: 2000
cl: cur_scroll_top: 2500 previous: 2000
cl: prev_scroll_top: 2500
cl: cur_scroll_top: 3000 previous: 2500
cl: prev_scroll_top: 3000
cl: cur_scroll_top: 3500 previous: 3000
cl: prev_scroll_top: 3500
cl: cur_scroll_top: 3825 previous: 3500
cl: prev_scroll_top: 3825
cl: cur_scroll_top: 3825 previous: 3825
cl: end reached!
cl: scroll ended

However, if I modify the get_card_search_results function like this and add this second part to extract some information from the page:

async function get_card_search_results(options)
{
    await scroll_current_page({url:url, gp_args:gp_args})

    var total_pages = await page.evaluate(([gp_args]) => {
        var retval = false
        await new Promise((resolve) => {
            var el = document.querySelector('section[data-testid="paginationWrapper"] section article:nth-child(2)')
                
            var retval = el.childNodes.length
        })
    return retval

    }, [gp_args])
}

the entire script hangs at this point with this:

TimeoutError: Navigation timeout of 30000 ms exceeded
    at new Deferred (c:\code\get_the_data\node_modules\puppeteer-core\lib\cjs\puppeteer\util\Deferred.js:60:34)
    at Deferred.create (c:\code\get_the_data\node_modules\puppeteer-core\lib\cjs\puppeteer\util\Deferred.js:21:16)
    at new LifecycleWatcher (c:\code\get_the_data\node_modules\puppeteer-core\lib\cjs\puppeteer\cdp\LifecycleWatcher.js:73:60)
    at CdpFrame.goto (c:\code\get_the_data\node_modules\puppeteer-core\lib\cjs\puppeteer\cdp\Frame.js:149:29)
    at CdpFrame.<anonymous> (c:\code\get_the_data\node_modules\puppeteer-core\lib\cjs\puppeteer\util\decorators.js:109:27)
    at CdpPage.goto (c:\code\get_the_data\node_modules\puppeteer-core\lib\cjs\puppeteer\api\Page.js:572:43)
    at scroll_current_page (c:\code\get_the_data\get_the_data.js:831:14)
    at get_card_search_results (c:\code\get_the_data\get_the_data.js:804:10)
    at c:\code\get_the_data\get_the_data.js:279:22

I haven't been able to figure out why adding this second page.evaluate causes it to hang.

3
  • Can you share a minimal reproducible example? Much of these funcs are undefined and there is no URL, so I can't run this to repro the issue. What info do you want to get on the page, exactly? Please provide all details! Thanks. Commented Nov 9 at 2:42
  • 1
    In var total_pages, you're not resolving the promise. I also think you'll be fine without Promise(). Commented Nov 9 at 11:03
  • @ninadepina Good point, but I don't think that matches OP's navigation error. Commented Nov 9 at 15:55

0

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.