Below is a script I had some help with. I would like to alter it to give me 2 new columns with 3 possible variables. Date | gamePK | Home | Home Rest | Away | Away Rest
The current matches.csv format is Date | gamePK | Home | Away
Home Rest & Away Rest (-1 if the team played the day prior vs a team that didn't, 1 if the team didn't play the day prior vs an opponent who did, 0 otherwise)
Any information on how to create the columns and write this statement for them would be much appreciated.
import csv
import requests
import datetime
from pprint import pprint
import time
import pandas as pd
kp = []
for i in range(20001,20070):
req = requests.get('https://statsapi.web.nhl.com/api/v1/schedule?site=en_nhl&gamePk=20180' + str(i) + '&leaderGameTypes=R&expand=schedule.broadcasts.all,schedule.radioBroadcasts,schedule.teams,schedule.ticket,schedule.game.content.media.epg')
data = req.json()
for item in data['dates']:
date = item['date']
games = item['games']
for game in games:
gamePk = game['gamePk']
season = game['season']
teams = game['teams']
home = teams['home']
home_tm = home['team']['abbreviation']
away = teams['away']
away_tm = away['team']['abbreviation']
print (date, gamePk, away_tm, home_tm)
kp.append([date, gamePk, away_tm, home_tm])
pprint(kp)
df = pd.DataFrame(kp, columns=['Date','gamePk','Home', 'Away'])
df.to_csv('matches.csv', sep=',', header=True, index=False)
time.sleep(5)
def find_last(match_date, da, team):
home_play = da[da['Home'] == team].tail(1) #then find last matches played at home, select greatest
away_play = da[da['Away'] == team].tail(1) #" " find last matches played at away, select greatest
#then take the last match played, either home or away, whichever is more recent
if home_play.empty and away_play.empty:
print (team, "no_matches before this date")
last_match = 'NA'
elif home_play.empty:
last_match = away_play.Date.item()
elif away_play.empty:
last_match = home_play.Date.item()
else:
last_match = max([home_play.Date.item(), away_play.Date.item()])
if last_match != 'NA':
#And then subtract this from "todays" date (match_date)
duration_since_last = pd.to_datetime(match_date) - pd.to_datetime(last_match)
print ("Team:", team)
print ("Todays game date = ", match_date)
print ("Last match played = ", last_match)
print ("Rest Period = ", duration_since_last)
print()
return duration_since_last
df = pd.read_csv('matches.csv', sep=',')
for k in df.index:
home_team = df.Home[k]
away_team = df.Away[k]
match_date = df.Date[k]
gamePk = df.gamePk[k]
#we want to find all date values less than todays match date.
da = df[df['Date'] < match_date]
## if not da.empty:
for team in [home_team,away_team]:
print ("Record", k, home_team, 'vs', away_team)
find_last(match_date, da, team)
print ('________________________________________')