I'm trying to extract data from the internet. My code goes through the first loop smoothly, prints and loads data to the file but it won't print data for the next pages. Not I am using a python 3 notebook. Here is my python code.
import urllib3
from bs4 import BeautifulSoup as soup
from time import sleep
from random import randint
import pandas as pd
http = urllib3.PoolManager()
filename = "GautengForSale.csv"
f = open(filename, "w")
headers = "Description, Location, Price, Bedrooms, Bathrooms, Parking, FloorSize\n"
f.write(headers)
for page in range(1, 5):
url = 'https://www.property24.com/for-sale/gauteng/1/p'+str(page)+'?PropertyCategory=House%2cApartmentOrFlat%2cTownhouse'
page_html = http.request('GET', url)
page_soup = soup(page_html.data)
containers = page_soup.findAll("div", {"class": "p24_content"})
sleep(randint(2,10))
for container in containers:
description_container = container.findAll("div", {"class": "p24_description"})
if not description_container:
continue
else:
description = description_container[0].text
location_container = container.findAll("span", {"class": "p24_location"})
location = location_container[0].text
price_container = container.findAll("div", {"class": "p24_price"})
price = price_container[0].text.strip()
bedrooms_container = container.findAll("span", {"class": "p24_featureDetails", "title": "Bedrooms"})
if not bedrooms_container:
bedrooms = 0
else:
bedrooms = bedrooms_container[0].text.strip()
bathrooms_container = container.findAll("span", {"class": "p24_featureDetails", "title": "Bathrooms"})
if not bathrooms_container:
bathrooms = 1
else:
bathrooms = bathrooms_container[0].text.strip()
parking_container = container.findAll("span", {"class": "p24_featureDetails", "title": "Parking Spaces"})
if not parking_container:
parking = 0
else:
parking = parking_container[0].text.strip()
floor_size_container = container.findAll("span", {"class": "p24_size", "title": "Floor Size"})
if not floor_size_container:
floor_size = "n/a"
else:
floor_size = floor_size_container[0].text.strip()
print(str(description) + "," + str(location) + "," + str(price) + "," + str(bedrooms) + "," + str(bathrooms) + "," + str(parking) + "," + str(floor_size) + "\n")
f.write(str(description) + "," + str(location) + "," + str(price) + "," + str(bedrooms) + "," + str(bathrooms) + "," + str(parking) + "," + str(floor_size) + "\n")
f.close()
I'm not sure where I went wrong.