Node puppeteer scraping YouTube and encountering redirected you too many times

Question

I'm trying to scrape a YouTube playlists URL using Node / puppeteer. It was working, but now I'm getting ERR_TOO_MANY_REDIRECTS error. I can still access the page using chrome from my desktop.

I've tried using the chromium browser and chrome browsers. I've also tried using the puppeteer-extra stealth plugin and the random-useragent.

This is how my code stand at the moment:

const browser = await puppeteer.launch({
      stealth: true,
      headless: false // true,
      executablePath: "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe",
      args: [
          '--disable-notifications', '--disable-features=site-per-process'
      ],
      defaultViewport: null
        });
      const page = await browser.newPage()
      await page.setUserAgent(random_useragent.getRandom());
      await page.goto(<playlist-url, {
        waitUntil: 'networkidle2',
        timeout: 0
      })


     await page.waitForSelector('button[aria-label="Agree to the use of cookies and other data for the purposes described"')

It at the page.goto it bombs. And it happens even if I try going to https://www.youtube.com.

Any suggestions what I should try next. I tried a proxy server but couldn't get it to work. I suspect I need a proxy to actually route through.

Interesting case. You're getting this error even if not running headless? Try not to use a random user-agent, there could happen a mobile one and Youtube might want to redirect you to a mobile version of the site. Also what if you tried to use an existing account with cookies and localstorage, etc. - see about userDataDir here — Vaviloff
– Vaviloff, Commented Feb 5, 2022 at 7:50
As a side thought, scraping playlists might be easier using RSS: youtube.com/feeds/videos.xml?playlist_id=PLAYLIST_ID — Vaviloff
– Vaviloff, Commented Feb 5, 2022 at 7:57
Sorry for the delay. You put me onto the answer. I needed to get the actual playlist IDs for a given channel, so I used youtube.com/feeds/videos.xml?channel_id=<Channel ID> Nice one. Thanks for your help. If you post this as an answer I'll flag it accordingly. — Strontium_99
– Strontium_99, Commented Feb 8, 2022 at 10:26

Vaviloff · Accepted Answer · 2022-02-16 08:29:26Z

2

If all you need is playlist IDs for a given channel, it's possible to query a feed at:

https://youtube.com/feeds/videos.xml?channel_id=<Channel ID>

To get IDs of videos you can query a feed at:

https://youtube.com/feeds/videos.xml?playlist_id=PLAYLIST_ID

answered Feb 16, 2022 at 8:29

Vaviloff

16.9k6 gold badges55 silver badges64 bronze badges

Sign up to request clarification or add additional context in comments.

2 Comments

George Over a year ago

no longer the case?

Vaviloff Over a year ago

@George Just checked, still works, both for channel videos and playlists; but do note: the playlist must be public.

Mikhail Zub · Accepted Answer · 2022-08-09 13:01:43Z

You can get playlists (and Mixes) links from YouTube like in the code example below (also check full code the online IDE):

const puppeteer = require("puppeteer-extra");
const StealthPlugin = require("puppeteer-extra-plugin-stealth");

puppeteer.use(StealthPlugin());

const searchString = "java course";

const requestParams = {
  baseURL: `https://www.youtube.com`,
  encodedQuery: encodeURI(searchString),                            // what we want to search for in URI encoding
};


async function fillPlaylistsDataFromPage(page) {
  const dataFromPage = await page.evaluate((requestParams) => {
    const mixes = Array.from(document.querySelectorAll("#contents > ytd-radio-renderer")).map((el) => ({
      title: el.querySelector("a > h3 > #video-title")?.textContent.trim(),
      link: `${requestParams.baseURL}${el.querySelector("a#thumbnail")?.getAttribute("href")}`,
      videos: Array.from(el.querySelectorAll("ytd-child-video-renderer a")).map((el) => ({
        title: el.querySelector("#video-title")?.textContent.trim(),
        link: `${requestParams.baseURL}${el.getAttribute("href")}`,
        length: el.querySelector("#length")?.textContent.trim(),
      })),
      thumbnail: el.querySelector("a#thumbnail #img")?.getAttribute("src"),
    }));
    const playlists = Array.from(document.querySelectorAll("#contents > ytd-playlist-renderer")).map((el) => ({
      title: el.querySelector("a > h3 > #video-title")?.textContent.trim(),
      link: `${requestParams.baseURL}${el.querySelector("a#thumbnail")?.getAttribute("href")}`,
      channel: {
        name: el.querySelector("#channel-name a")?.textContent.trim(),
        link: `${requestParams.baseURL}${el.querySelector("#channel-name a")?.getAttribute("href")}`,
      },
      videoCount: el.querySelector("yt-formatted-string.ytd-thumbnail-overlay-side-panel-renderer")?.textContent.trim(),
      videos: Array.from(el.querySelectorAll("ytd-child-video-renderer a")).map((el) => ({
        title: el.querySelector("#video-title")?.textContent.trim(),
        link: `${requestParams.baseURL}${el.getAttribute("href")}`,
        length: el.querySelector("#length")?.textContent.trim(),
      })),
      thumbnail: el.querySelector("a#thumbnail #img")?.getAttribute("src"),
    }));
    return [...mixes, ...playlists];
  }, requestParams);
  return dataFromPage;
}


async function getYoutubeSearchResults() {
  const browser = await puppeteer.launch({
    headless: false,
    args: ["--no-sandbox", "--disable-setuid-sandbox"],
  });
  const page = await browser.newPage();
  const URL = `${requestParams.baseURL}/results?search_query=${requestParams.encodedQuery}`;
  await page.setDefaultNavigationTimeout(60000);
  await page.goto(URL);
  await page.waitForSelector("#contents > ytd-video-renderer");
  const playlists = await fillPlaylistsDataFromPage(page);

  await browser.close();

  return playlists;
}

getYoutubeSearchResults().then(console.log);

📌Note: to get thumbnail you need to scroll playlist into view (using .scrollIntoView() method).

Output:

[
   {
      "title":"Java Complete Course | Placement Series",
      "link":"https://www.youtube.com/watch?v=yRpLlJmRo2w&list=PLfqMhTWNBTe3LtFWcvwpqTkUSlB32kJop",
      "channel":{
         "name":"Apna College",
         "link":"https://www.youtube.com/c/ApnaCollegeOfficial"
      },
      "videoCount":"35",
      "videos":[
         {
            "title":"Introduction to Java Language | Lecture 1 | Complete Placement Course",
            "link":"https://www.youtube.com/watch?v=yRpLlJmRo2w&list=PLfqMhTWNBTe3LtFWcvwpqTkUSlB32kJop",
            "length":"18:46"
         },
         {
            "title":"Variables in Java | Input Output | Complete Placement Course | Lecture 2",
            "link":"https://www.youtube.com/watch?v=LusTv0RlnSU&list=PLfqMhTWNBTe3LtFWcvwpqTkUSlB32kJop",
            "length":"42:36"
         }
      ],
      "thumbnail":null
   },
   {
      "title":"Java Tutorials For Beginners In Hindi",
      "link":"https://www.youtube.com/watch?v=ntLJmHOJ0ME&list=PLu0W_9lII9agS67Uits0UnJyrYiXhDS6q",
      "channel":{
         "name":"CodeWithHarry",
         "link":"https://www.youtube.com/c/CodeWithHarry"
      },
      "videoCount":"113",
      "videos":[
         {
            "title":"Introduction to Java + Installing Java JDK and IntelliJ IDEA for Java",
            "link":"https://www.youtube.com/watch?v=ntLJmHOJ0ME&list=PLu0W_9lII9agS67Uits0UnJyrYiXhDS6q",
            "length":"19:00"
         },
         {
            "title":"Basic Structure of a Java Program: Understanding our First Java Hello World Program",
            "link":"https://www.youtube.com/watch?v=zIdg7hkqNE0&list=PLu0W_9lII9agS67Uits0UnJyrYiXhDS6q",
            "length":"14:09"
         }
      ],
      "thumbnail":null
   }
]

You can read more about scraping YouTube playlists from blog post Web scraping YouTube secondary search results with Nodejs.

Collectives™ on Stack Overflow

Node puppeteer scraping YouTube and encountering redirected you too many times

2 Answers 2

2 Comments

Comments

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

2 Answers 2

2 Comments

Comments

Your Answer

Sign up or log in

Post as a guest

Related