So right now when I run this, I get a final output that includes two header columns. As a result it won't let me write this to a .csv either. How would I fix this so that it only includes the column from the first table? (seeing the rest of the column names are the same throughout)
import pandas as pd
import urllib.request
import bs4 as bs
urls = ['https://fantasysportsdaily.net/bsl/boxes/1-1.html',
'https://fantasysportsdaily.net/bsl/boxes/1-2.html'
]
final = []
for url in urls:
df = pd.read_html(url, header=0)
format1 = df[1].iloc[:, : 16]
colname1 = format1.columns[0]
format1.insert(1, 'Team', colname1)
format1.rename(columns = {list(format1)[0]: 'Player'}, inplace = True)
format2 = format1.drop(format1[format1.Player == 'TEAM TOTALS'].index)
team1 = format2.drop(format2[format2.Player == 'PERCENTAGES'].index)
format3 = df[2].iloc[:, : 16]
colname2 = format3.columns[0]
format3.insert(1, 'Team', colname2)
format3.rename(columns = {list(format3)[0]: 'Player'}, inplace = True)
format4 = format3.drop(format3[format3.Player == 'TEAM TOTALS'].index)
team2 = format4.drop(format4[format4.Player == 'PERCENTAGES'].index)
both_teams = [team1, team2]
combined = pd.concat(both_teams)
final.append(combined, ignore_index=True)
print(final)
##final.to_csv ('boxes.csv', index = True, header=True)