I want to delete all rows after the row containing the string "End of the 4th Quarter". Currently, this is row 474 but it will change depending on the game.
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
url = "http://www.espn.com/nba/playbyplay?gameId=400900395"
r = requests.get(url)
data = r.text
soup = BeautifulSoup(data,"html.parser")
data_rows = soup.findAll("tr")[4:]
play_data = []
for i in range(len(data_rows)):
play_row = []
for td in data_rows[i].findAll('td'):
play_row.append(td.getText())
play_data.append(play_row)
df = pd.DataFrame(play_data)
df.to_html("pbp_data")
