I am now building a web-scraping program with Python 3.5 and bs4. In the code below I tried to retrieve the data from two tables in the url. I succeed in the first table, but error pops out for the second one. The error is "IndexError: list index out of range" for "D.append(cells[0].find(text=True))". I have checked the list indices for "cells', which gives me 0,1,2, so should be no problem. Could anyone suggest any ideas on solving this issue?
import tkinter as tk
def test():
from bs4 import BeautifulSoup
import urllib.request
import pandas as pd
url_text = 'http://www.sce.hkbu.edu.hk/future-students/part-time/short-courses-regular.php?code=EGE1201'
resp = urllib.request.urlopen(url_text)
soup = BeautifulSoup(resp, from_encoding=resp.info().get_param('charset'))
all_tables=soup.find_all('table')
print (all_tables)
right_table=soup.find('table', {'class' : 'info'})
A=[]
B=[]
C=[]
for row in right_table.findAll("tr"):
cells = row.findAll('td')
A.append(cells[0].find(text=True))
B.append(cells[1].find(text=True))
C.append(cells[2].find(text=True))
df=pd.DataFrame()
df[""]=A
df["EGE1201"]=C
print(df)
D=[]
E=[]
F=[]
right_table=soup.find('table', {'class' : 'schedule'})
for row in right_table.findAll("tr"):
try:
cells = row.findAll('th')
except:
cells = row.findAll('td')
D.append(cells[0].find(text=True))
E.append(cells[1].find(text=True))
F.append(cells[2].find(text=True))
df1=pd.DataFrame()
df[D[0]]=D[1]
df[E[0]]=E[1]
df[F[0]]=F[1]
print(df1)
if __name__ == '__main__':
test()