I’m working on a Selenium-based Instagram scraper (for learning purposes) and I'm trying to extract the first 20 comments from a post.
However, Selenium always returns only 15 comments, even though the post contains hundreds.
Problem
I always get only ~15 comments, even though the post has hundreds. The issue seems to be this line:
Whole Code:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
# ---------------- CONFIG ----------------
SELENIUM_PROFILE_PATH = r"C:\\Users\\kogre\\AppData\\Local\\Google\\Chrome\\User Data\\SeleniumProfile"
ACCOUNT_URL = "https://www.instagram.com/therock/"
# ---------------- DRIVER ----------------
options = webdriver.ChromeOptions()
options.add_argument(f"user-data-dir={SELENIUM_PROFILE_PATH}")
options.add_argument("--start-maximized")
driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=options
)
wait = WebDriverWait(driver, 20)
# ---------------- OPEN ACCOUNT ----------------
driver.get(ACCOUNT_URL)
time.sleep(3)
# ---------------- OPEN FIRST POST ----------------
first_post = wait.until(
EC.element_to_be_clickable(
(By.XPATH, "(//a[contains(@href,'/p/') or contains(@href,'/reel/')])[1]")
)
)
first_post.click()
time.sleep(2)
# ---------------- CLICK Load more comments ----------------
# This never finds anything
while True:
try:
load_more_btn = wait.until(
EC.element_to_be_clickable(
(By.XPATH, "//button[.//title[normalize-space(text())='Load more comments']]")
)
)
driver.execute_script("arguments[0].click();", load_more_btn)
time.sleep(1)
except:
break # No more button found
# ---------------- GET COMMENTS ----------------
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "li._a9zj")))
comment_items = driver.find_elements(By.CSS_SELECTOR, "li._a9zj")
real_comments = comment_items[1:] # remove caption
username_selectors = ["h3 a", "h2 a", "a.x1i10hfl", "a[role='link']"]
results = []
max_comments = min(20, len(real_comments))
for i in range(max_comments):
block = real_comments[i]
username = None
for sel in username_selectors:
try:
el = block.find_element(By.CSS_SELECTOR, sel)
if el.text.strip():
username = el.text.strip()
break
except:
pass
if not username:
try:
username = block.find_element(By.TAG_NAME, "a").text.strip()
except:
username = "UNKNOWN"
try:
text = block.find_element(By.CSS_SELECTOR, "span._ap3a").text.strip()
except:
text = ""
results.append((username, text))
print(results)
driver.quit()
I think the issue is in this chunk of code
# open post...
load_more_btn = wait.until(
EC.element_to_be_clickable(
(By.XPATH, "//button[.//title[normalize-space(text())='Load more comments']]")
)
)
driver.execute_script("arguments[0].click();", load_more_btn)
# then get comments: driver.find_elements(By.CSS_SELECTOR, "li._a9zj")
What I expect
A way to load more comments so Selenium can collect 20+ comments.
What I suspect
For some reason, Selenium can't reach the load more comments button due to the wrong CSS selector code
Question
What is the correct CSS/XPath selector for the “Load more comments” element in the current Instagram web UI? Or has Instagram replaced the button with a scroll-to-load system?
Any updated working selector or approach would help. Thanks!