0

What I am trying to scrape I have tried changing the sleep time and time_between_checking. It returns on the first iteration and then fails in the while loop. Why would the By.CSS selector execute correctly the first time the get_first_listing function is called and then fail the second time?

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
import time
import os

chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_experimental_option('excludeSwitches', ['enable-logging'])

os.environ['WDM_LOG_LEVEL'] = '0'
s = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=s, options=chrome_options)
# driver = webdriver.Chrome(s=path, options=chrome_options) # if you have problems with line 15

# Setting
classified_link = 'https://classifieds.ksl.com/search/Furniture'
time_to_wait_between_checking = 15


def get_first_listing():
    driver.get(classified_link)
    time.sleep(15)
    link = driver.find_element(By.CSS_SELECTOR, '#search-results > div > section > div > div:nth-child(1) > '
                                                'section:nth-child(4) > div.listing-item-info > h2 >'
                                                ' div > a').get_attribute('href')
    title = driver.find_element(By.CSS_SELECTOR, '#search-results > div > section > div > div:nth-child(1) >'
                                                 ' section:nth-child(4) > div.listing-item-info > h2 > div > a').text
    return (link, title)


listing_info = get_first_listing()
first_listing_link_temp = listing_info[0]
listing_title = listing_info[1]

print(f"First Listing Title: {listing_title}, Link: {first_listing_link_temp}")

check_count = 0
active = True
while active:
    check_count += 1
    time.sleep(time_to_wait_between_checking)
    print(f"Checking to see if new listing, this is attempt number {check_count}")
    new_listing_info = get_first_listing()
    first_listing_link = new_listing_info[0]
    title = new_listing_info[1]
    if first_listing_link_temp != first_listing_link:
        print(f"There is a new ad. Title {title}, Link: {first_listing_link}")
        active = False
        break

The output is:

Traceback (most recent call last):
  File "C:PATH.py", line 46, in <module>
    new_listing_info = get_first_listing()
  File "C:PATH.py", line 26, in get_first_listing
    link = driver.find_element(By.CSS_SELECTOR, '#search-results > div > section > div > div:nth-child(1) >'
  File "C:PATH\anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 856, in find_element
    return self.execute(Command.FIND_ELEMENT, {
  File "C:PATH\anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 429, in execute
    self.error_handler.check_response(response)
  File "C:PATH\anaconda3\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 243, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":"#search-results > div > section > div > div:nth-child(1) > section:nth-child(4) > div.listing-item-info > h2 > div > a"}
  (Session info: headless chrome=106.0.5249.119)
Stacktrace:
Backtrace:
...

Process finished with exit code 1
1
  • Website is blocked by 'Press & Hold' captcha. Commented Oct 30, 2022 at 5:50

1 Answer 1

1

There are several issues here to make your code start working:

  1. You have to close the cookies banner.
  2. Need to introduce WebDriverWait to wait for elements to be visible, clickable etc.
  3. Locators need to be improved.
    The following code should be better:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import time
import os

chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_experimental_option('excludeSwitches', ['enable-logging'])

os.environ['WDM_LOG_LEVEL'] = '0'
s = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=s, options=chrome_options)
# driver = webdriver.Chrome(s=path, options=chrome_options) # if you have problems with line 15
wait = WebDriverWait(driver, 20)

# Setting
classified_link = 'https://classifieds.ksl.com/search/Furniture'
time_to_wait_between_checking = 15


def get_first_listing():
    driver.get(classified_link)
    wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "#onetrust-close-btn-container button.onetrust-close-btn-handler"))).click() #close the cookies banner
    title_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, ".item-info-title-link a")))
    link = title_element.get_attribute('href')
    title = title_element.text
    return (link, title)


listing_info = get_first_listing()
first_listing_link_temp = listing_info[0]
listing_title = listing_info[1]

print(f"First Listing Title: {listing_title}, Link: {first_listing_link_temp}")

check_count = 0
active = True
while active:
    check_count += 1
    time.sleep(time_to_wait_between_checking)
    print(f"Checking to see if new listing, this is attempt number {check_count}")
    new_listing_info = get_first_listing()
    first_listing_link = new_listing_info[0]
    title = new_listing_info[1]
    if first_listing_link_temp != first_listing_link:
        print(f"There is a new ad. Title {title}, Link: {first_listing_link}")
        active = False
        break

Here I fixed get_first_listing() method only, not continued to the while loop

Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.