1

I am extracting google reviews of a resturant. I am interested in extracting reviewer name, rating given by reviewer, and text of the review. I used following code for the extraction:

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
import time

driver = webdriver.Chrome('')
base_url = 'https://www.google.com/search?tbs=lf:1,lf_ui:9&tbm=lcl&sxsrf=AOaemvJFjYToqQmQGGnZUovsXC1CObNK1g:1633336974491&q=10+famous+restaurants+in+Dunedin&rflfq=1&num=10&sa=X&ved=2ahUKEwiTsqaxrrDzAhXe4zgGHZPODcoQjGp6BAgKEGo&biw=1280&bih=557&dpr=2#lrd=0xa82eac0dc8bdbb4b:0x4fc9070ad0f2ac70,1,,,&rlfi=hd:;si:5749134142351780976,l,CiAxMCBmYW1vdXMgcmVzdGF1cmFudHMgaW4gRHVuZWRpbiJDUjEvZ2VvL3R5cGUvZXN0YWJsaXNobWVudF9wb2kvcG9wdWxhcl93aXRoX3RvdXJpc3Rz2gENCgcI5Q8QChgFEgIIFkiDlJ7y7YCAgAhaMhAAEAEQAhgCGAQiIDEwIGZhbW91cyByZXN0YXVyYW50cyBpbiBkdW5lZGluKgQIAxACkgESaXRhbGlhbl9yZXN0YXVyYW50mgEkQ2hkRFNVaE5NRzluUzBWSlEwRm5TVU56ZW5WaFVsOUJSUkFCqgEMEAEqCCIEZm9vZCgA,y,2qOYUvKQ1C8;mv:[[-45.8349553,170.6616387],[-45.9156414,170.4803685]]'
driver.get(base_url)
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//div[./span[text()='Newest']]"))).click()

total_reviews_text =driver.find_element_by_xpath("//div[@class='review-score-container']//div//div//span//span[@class='z5jxId']").text
num_reviews = int (total_reviews_text.split()[0])

all_reviews = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.gws-localreviews__google-review')))
time.sleep(2)
total_reviews = len(all_reviews)

 while total_reviews < num_reviews:
        driver.execute_script('arguments[0].scrollIntoView(true);', all_reviews[-1])
        WebDriverWait(driver, 5, 0.25).until_not(EC.presence_of_element_located((By.CSS_SELECTOR, 'div[class$="activityIndicator"]')))
        #all_reviews = driver.find_elements_by_css_selector('div.gws-localreviews__google-review')
        time.sleep(5)
        all_reviews = WebDriverWait(driver, 5).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.gws-localreviews__google-review')))
        print(total_reviews)
        total_reviews +=5

person_info = driver.find_elements_by_xpath("//div[@id='reviewSort']//div[contains(@class,'google-review')]")
rating_info = driver.find_elements_by_xpath("//div[@class='PuaHbe']")
review_text = driver.find_elements_by_xpath("//div[@class='Jtu6Td']")

for person in person_info:
    name = person.find_element_by_xpath("./div/div/div/a").text
    print(name)

for rating in rating_info:  
    rating_txt = person.find_element_by_xpath("./g-review-stars/span").get_attribute('aria-label')
    print(rating_txt)

for text in review_text:
    texts = text.find_element_by_xpath("./span").text
    print(texts)

The above code worked as per expectations. I want to make slight change in above code. Instead of using three loops to display name, rating, and review_text. I wanted to extract the same information using one loop. So I made following changes in the above code:

reviews_info = driver.find_elements_by_xpath("//div[@class='jxjCjc']")
for review_info in reviews_info:
    name = review_info.find_element_by_xpath("./div/div/a").text
    rating = review_info.find_element_by_xpath("//div[@class='PuaHbe']//g-review-stars//span").get_attribute('aria-label')
    text = review_info.find_element_by_xpath("//div[@class='Jtu6Td']//span").text
    print(name)
    print(rating)
    print(text)
    print()

The problem with a change in code is that it displays the same information (i.e. rating and text) for all reviewers names. I am not sure where am I making the mistake. Any help to fix the issue would be really appreciated.

2
  • 1
    Hi! Could you find a way to rephrase your problem with a minimal reproducible example? Some people might have very insightful answers for you, but they'll be put off by all the webdriver-related things so they won't answer. Could you find a way to rephrase the problem by focusing more on the issue (how to loop) and less on all the webdriver stuff? Commented Oct 11, 2021 at 8:13
  • 1
    You forgot current context . in rating and text xpath. Commented Oct 11, 2021 at 8:28

1 Answer 1

1
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

driver = webdriver.Chrome()
base_url = 'https://www.google.com/search?tbs=lf:1,lf_ui:9&tbm=lcl&sxsrf=AOaemvJFjYToqQmQGGnZUovsXC1CObNK1g:1633336974491&q=10+famous+restaurants+in+Dunedin&rflfq=1&num=10&sa=X&ved=2ahUKEwiTsqaxrrDzAhXe4zgGHZPODcoQjGp6BAgKEGo&biw=1280&bih=557&dpr=2#lrd=0xa82eac0dc8bdbb4b:0x4fc9070ad0f2ac70,1,,,&rlfi=hd:;si:5749134142351780976,l,CiAxMCBmYW1vdXMgcmVzdGF1cmFudHMgaW4gRHVuZWRpbiJDUjEvZ2VvL3R5cGUvZXN0YWJsaXNobWVudF9wb2kvcG9wdWxhcl93aXRoX3RvdXJpc3Rz2gENCgcI5Q8QChgFEgIIFkiDlJ7y7YCAgAhaMhAAEAEQAhgCGAQiIDEwIGZhbW91cyByZXN0YXVyYW50cyBpbiBkdW5lZGluKgQIAxACkgESaXRhbGlhbl9yZXN0YXVyYW50mgEkQ2hkRFNVaE5NRzluUzBWSlEwRm5TVU56ZW5WaFVsOUJSUkFCqgEMEAEqCCIEZm9vZCgA,y,2qOYUvKQ1C8;mv:[[-45.8349553,170.6616387],[-45.9156414,170.4803685]]'
driver.get(base_url)
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//div[./span[text()='Newest']]"))).click()

total_reviews_text =driver.find_element_by_xpath("//div[@class='review-score-container']//div//div//span//span[@class='z5jxId']").text
num_reviews = int (total_reviews_text.split()[0])
print("NUm reviews=", num_reviews)

all_reviews = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.gws-localreviews__google-review')))
time.sleep(2)
total_reviews = len(all_reviews)
print("Total reviews=", total_reviews)
s = "(//div[@id='reviewSort']//div[contains(@class,'google-review')])[0]"
b = '0'
a = 1  # Index of Review button

for i in range(10):
    c = str(a)
    s = s.replace(b, c)  # Updating Xpath's index in every loop so that it can focus on new review everytime.
    b = str(a)
    a = a + 1
    WebDriverWait(driver, 5, 0.25).until_not(EC.presence_of_element_located((By.CSS_SELECTOR, 'div[class$="activityIndicator"]')))
    time.sleep(5)
    all_reviews = WebDriverWait(driver, 5).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.gws-localreviews__google-review')))
    total_reviews +=1
    Info = driver.find_element_by_xpath(s).text
    print(Info)
    print("<------------------------------------------------------>\n\n")

Output:- Click Here to See Program Output

Sign up to request clarification or add additional context in comments.

1 Comment

Add some description to your answer, as it's written it is difficult to read.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.