I am currently trying to extract some data from an event webpage, following a tutorial as I've never done this or used Python for this before. It involves extracting the name, date and location of listed events. It seems to either be extracting or outputting the data twice, but I can't see any line of code that would be doing it. Any help would be appreciated!
from time import sleep
from time import time
from random import randint
from bs4 import BeautifulSoup
from requests import get
import pandas
#loop through individual webpages
pages = [str(i) for i in range(1,3)]
url = 'https://www.eventbrite.com/d/malaysia--kuala-lumpur--85675181/all-events/?page=' + str(pages)
name = []
date = []
location = []
start_time = time()
requests = 0
for page in pages:
response = get(url)
sleep(randint(1,3))
requests += 1
elapsed_time = time() - start_time
print('Request: {}; Frequency: {} requests/s'.format(requests, requests/elapsed_time))
if response.status_code != 200:
warn('Request: {}; Status Code: {}'.format(requests, response.status_code))
html_soup = BeautifulSoup(response.text, 'html.parser')
#main div
event_containers = html_soup.find_all('div', class_ = 'eds-media-card-content__content__principal')
for container in event_containers:
#get event name
event_name = container.h3.div.div.text
name.append(event_name)
#get event day & date
event_date = container.div.div.text
date.append(event_date)
#get event location
event_location = container.find('div', class_ = 'card-text--truncated__one')
location.append(event_location)
event_list = pandas.DataFrame({
'event': name,
'date': date,
'location': location
})
print(event_list)
