1

Below is a script I had some help with. I would like to alter it to give me 2 new columns with 3 possible variables. Date | gamePK | Home | Home Rest | Away | Away Rest

The current matches.csv format is Date | gamePK | Home | Away

Home Rest & Away Rest (-1 if the team played the day prior vs a team that didn't, 1 if the team didn't play the day prior vs an opponent who did, 0 otherwise)

Any information on how to create the columns and write this statement for them would be much appreciated.

import csv
import requests
import datetime
from pprint import pprint
import time
import pandas as pd

kp = []
for i in range(20001,20070):
    req = requests.get('https://statsapi.web.nhl.com/api/v1/schedule?site=en_nhl&gamePk=20180' + str(i) + '&leaderGameTypes=R&expand=schedule.broadcasts.all,schedule.radioBroadcasts,schedule.teams,schedule.ticket,schedule.game.content.media.epg')
    data = req.json()

    for item in data['dates']:
        date = item['date']
        games = item['games']
        for game in games:
            gamePk = game['gamePk']
            season = game['season']
            teams = game['teams']
            home = teams['home']
            home_tm = home['team']['abbreviation']           
            away = teams['away']
            away_tm = away['team']['abbreviation']

            print (date, gamePk, away_tm, home_tm)

            kp.append([date, gamePk, away_tm, home_tm])

pprint(kp)
df = pd.DataFrame(kp, columns=['Date','gamePk','Home', 'Away'])
df.to_csv('matches.csv', sep=',', header=True, index=False)

time.sleep(5)


def find_last(match_date, da, team):

        home_play = da[da['Home'] == team].tail(1) #then find last matches played at home, select greatest
        away_play = da[da['Away'] == team].tail(1) #"  " find last matches played at away, select greatest

        #then take the last match played, either home or away, whichever is more recent
        if home_play.empty and away_play.empty:
            print (team, "no_matches before this date")
            last_match = 'NA'

        elif home_play.empty:
            last_match = away_play.Date.item()

        elif away_play.empty:
            last_match = home_play.Date.item()            

        else:
            last_match = max([home_play.Date.item(), away_play.Date.item()])


        if last_match != 'NA':

            #And then subtract this from "todays" date (match_date)
            duration_since_last = pd.to_datetime(match_date) - pd.to_datetime(last_match)
            print ("Team:", team)
            print ("Todays game date  = ", match_date)
            print ("Last match played = ", last_match)
            print ("Rest Period       = ", duration_since_last)

            print()

            return duration_since_last

df = pd.read_csv('matches.csv', sep=',')

for k in df.index:

    home_team  = df.Home[k]
    away_team  = df.Away[k]
    match_date = df.Date[k]
    gamePk = df.gamePk[k]

    #we want to find all date values less than todays match date.
    da = df[df['Date'] < match_date]

##    if not da.empty:
    for team in [home_team,away_team]:
        print ("Record", k, home_team, 'vs', away_team)

        find_last(match_date, da, team)

    print ('________________________________________')

1 Answer 1

1

The script you provided has been broken into separate sections for greater understanding. The following new sections are required to produce your desired addition to the DataFrame:

  1. On Game Day, What is the Previous Day
  2. Did We Play on the Previous Day
  3. Determine Game Day Handicap

Here's a jupyter notebook of the work: nhl_stats_parsing

Code:

import csv
import requests
import datetime
from pprint import pprint
import time
import pandas as pd
from pprint import pprint as pp
import json


pd.set_option('max_columns', 100)
pd.set_option('max_rows', 300)


# ### make request to NHL stats server for data and save it to a file

address_p1 = 'https://statsapi.web.nhl.com/api/v1/schedule?site=en_nhl&gamePk=20180'
address_p2 = '&leaderGameTypes=R&expand=schedule.broadcasts.all,schedule.radioBroadcasts,schedule.teams,schedule.ticket,schedule.game.content.media.epg'

with open('data.json', 'w') as outfile:

    data_list = []

    for i in range(20001,20070):  # end 20070

        req = requests.get(address_p1 + str(i) + address_p2)
        data = req.json()

        data_list.append(data)  # append each request to the data list; will be a list of dicts


    json.dump(data_list, outfile)  # save the json file so you don't have to keep hitting the nhl server with your testing


# ### read the json file back in

with open('data.json') as f:
    data = json.load(f)


# ### this is what 1 record looks like

for i, x in enumerate(data):
    if i == 0:
        pp(x)


# ### parse each dict

kp = []
for json_dict in data:
    for item in json_dict['dates']:
        date = item['date']
        games = item['games']
        for game in games:
            gamePk = game['gamePk']
            season = game['season']
            teams = game['teams']
            home = teams['home']
            home_tm = home['team']['abbreviation']           
            away = teams['away']
            away_tm = away['team']['abbreviation']

            print (date, gamePk, away_tm, home_tm)

            kp.append([date, gamePk, away_tm, home_tm])


# ### create DataFrame and save to csv

df = pd.DataFrame(kp, columns=['Date','gamePk','Home', 'Away'])
df.to_csv('matches.csv', sep=',', header=True, index=False)


# ### read in csv into DataFrame

df = pd.read_csv('matches.csv', sep=',')

print(df.head())  # first 5


## On Game Day, What is the Previous Day

def yesterday(date):
    today = datetime.datetime.strptime(date, '%Y-%m-%d')
    return datetime.datetime.strftime(today - datetime.timedelta(1), '%Y-%m-%d')


def yesterday_apply(df):
    df['previous_day'] = df.apply(lambda row: yesterday(date=row['Date']), axis=1)


yesterday_apply(df)


## Did We Play on the Previous Day

def played_previous_day(df, date, team):
    filter_t = f'(Date == "{date}") & ((Home == "{team}") | (Away == "{team}"))'
    filtered_df = df.loc[df.eval(filter_t)]
    if filtered_df.empty:
        return False  # didn't play previous day
    else:
        return True  # played previous day


def played_previous_day_apply(df):
    df['home_played_previous_day'] = df.apply(lambda row: played_previous_day(df, date=row['previous_day'], team=row['Home']), axis=1)
    df['away_played_previous_day'] = df.apply(lambda row: played_previous_day(df, date=row['previous_day'], team=row['Away']), axis=1)


played_previous_day_apply(df)


# # Determine Game Day Handicap

# Home Rest & Away Rest (-1 if the team played the day prior vs a team that didn't, 1 if the team didn't play the day prior vs an opponent who did, 0 otherwise)

def handicap(team, home, away):
    if (team == 'home') and not home and away:
        return 1
    elif (team == 'away') and not home and away:
        return -1
    elif (team == 'home') and home and not away:
        return -1
    elif (team == 'away') and home and not away:
        return 1
    else:
        return 0


def handicap_apply(df):
    df['home_rest'] = df.apply(lambda row: handicap(team='home', home=row['home_played_previous_day'], away=row['away_played_previous_day']), axis=1)
    df['away_rest'] = df.apply(lambda row: handicap(team='away', home=row['home_played_previous_day'], away=row['away_played_previous_day']), axis=1)


handicap_apply(df)


print(df)


# ### data presentation method

def find_last(match_date, da, team):

        home_play = da[da['Home'] == team].tail(1)  # then find last matches played at home, select greatest
        away_play = da[da['Away'] == team].tail(1)  # "  " find last matches played at away, select greatest

        #then take the last match played, either home or away, whichever is more recent
        if home_play.empty and away_play.empty:
            print (team, "no_matches before this date")
            last_match = 'NA'

        elif home_play.empty:
            last_match = away_play.Date.item()

        elif away_play.empty:
            last_match = home_play.Date.item()            

        else:
            last_match = max([home_play.Date.item(), away_play.Date.item()])


        if last_match != 'NA':

            #And then subtract this from "todays" date (match_date)
            duration_since_last = pd.to_datetime(match_date) - pd.to_datetime(last_match)
            print ("Team:", team)
            print ("Todays game date  = ", match_date)
            print ("Last match played = ", last_match)
            print ("Rest Period       = ", duration_since_last)

            print()

            return duration_since_last


# ### produce your output

for k in df.index:

    home_team  = df.Home[k]
    away_team  = df.Away[k]
    match_date = df.Date[k]
    gamePk = df.gamePk[k]

    #we want to find all date values less than todays match date.
    da = df[df['Date'] < match_date]

##    if not da.empty:
    for team in [home_team, away_team]:
        print ("Record", k, home_team, 'vs', away_team)

        find_last(match_date, da, team)  # call your method

    print('_' * 40)

Output:

          Date      gamePk  Home    Away    previous_day    home_played_previous_day    away_played_previous_day    home_rest   away_rest
0   2018-10-03  2018020001  MTL TOR 2018-10-02  False   False   0   0
1   2018-10-03  2018020002  BOS WSH 2018-10-02  False   False   0   0
2   2018-10-03  2018020003  CGY VAN 2018-10-02  False   False   0   0
3   2018-10-03  2018020004  ANA SJS 2018-10-02  False   False   0   0
4   2018-10-04  2018020005  BOS BUF 2018-10-03  True    False   -1  1
5   2018-10-04  2018020006  NSH NYR 2018-10-03  False   False   0   0
6   2018-10-04  2018020007  WSH PIT 2018-10-03  True    False   -1  1
7   2018-10-04  2018020008  NYI CAR 2018-10-03  False   False   0   0
8   2018-10-04  2018020009  CHI OTT 2018-10-03  False   False   0   0
Sign up to request clarification or add additional context in comments.

1 Comment

Wow @Trenton_M I have been working with this for most of the day and was no where near this end result. Thank you very much for going through this process and all commenting your steps!

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.