I have the following code using Selenium to scrape this page (the list of albums, and the list of songs when you click on the album). The script is running but I would like to create a dataframe with panda with column with the list of albums (one per row) and the list of songs in another column.
I need it to us the data in Excel.
Thanks for all,
Léa
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
PATH = '/Users/prati/Desktop/WDD/Projet_Rapgenius/chromedriver'
#choix du navigateur
driver = webdriver.Chrome(PATH)
# ouvrir le site web concerné
driver.get('https://genius.com/Genius-france-discographie-rap-2021-annotated')
sleep(2)
# cliquer sur "j'accepte" pour les cookies
accept_button = driver.find_element_by_id('onetrust-accept-btn-handler')
accept_button.send_keys(Keys.ENTER)
sleep(2)
# trouver le titre de l'album/date/artiste
links = driver.find_elements_by_class_name('ReferentFragmentVariantdesktop__Highlight-sc-1837hky-1.jShaMP')
# boucle = à chaque fois qu'il le trouve...
for link in links:
# il doit le scraper et afficher puis se reposer
try:
album = link.text
print(album)
# puis cliquer dessus
link.click()
sleep(1)
# et scraper et afficher la liste des titres d'album
div = driver.find_element_by_class_name('RichText__Container-oz284w-0.gVsQub')
morceaux = div.find_elements_by_tag_name('li')
for morceau in morceaux:
print(morceau.text)
#...s'il trouve pas, il passe
except:
pass
#fermer l'onglet
driver.close()