I managed to do this in Python with Playwright, and a seperate JavaScript file that playwright can just read in and then run:
main.py
import os
from playwright.sync_api import sync_playwright
def get_storyboard_urls(video_id):
# Load javascript
script_path = os.path.join(os.path.dirname(__file__), 'storyboard_extractor.js')
with open(script_path, 'r') as f:
js_code = f.read()
# Run playwright
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
try:
page.goto(f"https://www.youtube.com/watch?v={video_id}")
storyboard_urls = page.evaluate(js_code)
if storyboard_urls is None or storyboard_urls == "":
raise ValueError(f"Failed to fetch storyboard URL for {video_id}\nValue returned: {storyboard_urls}")
return storyboard_urls
except Exception as e:
print(f"Error fetching storyboard URL: {e}")
raise e
finally:
browser.close()
get_storyboard_urls("nr0RPVvKWDI")
storyboard_extractor.js - a bit more detail in this answer
// This script extracts the storyboard URLs from a YouTube video page
() => {
// Storyboard Get
const resp = ytplayer.config.args.raw_player_response;
const storyboards = resp.storyboards;
const specRend = storyboards.playerStoryboardSpecRenderer;
const spec = specRend.spec;
const parts = spec.split("|")
const base_n = (n) => parts[0].replace('L$L/$N',`L${n}/M0`);
const signs = parts.map(p => p.split("rs$")[1]).filter(p => !!p).map(p => "rs%24" + p);
// Output
urls = []
for (let i = 0; i < signs.length; i++) {
let url = `${base_n(i)}&sigh=${signs[i]}`
urls.push(url)
}
return urls
}