From 6ba177c859d7011adc15880bfe9f22afcc0d2263 Mon Sep 17 00:00:00 2001 From: Akshay Easwaran Date: Sat, 3 Feb 2024 19:40:57 -0500 Subject: [PATCH] fixing issues with bad time checks --- setup.py | 2 +- sportsdataverse/cfb/cfb_pbp.py | 135 ++++++++++++++++++++++++--------- tests/cfb/test_pbp.py | 32 +++++++- 3 files changed, 127 insertions(+), 42 deletions(-) diff --git a/setup.py b/setup.py index 321d8dd..500d057 100755 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ # Versions should comply with PEP440. For a discussion on single-sourcing # the version across setup.py and the project code, see # https://packaging.python.org/en/latest/single_source_version.html - version="0.0.36.2.6", + version="0.0.36.2.7", description="Retrieve Sports data in Python", long_description=long_description, long_description_content_type="text/markdown", diff --git a/sportsdataverse/cfb/cfb_pbp.py b/sportsdataverse/cfb/cfb_pbp.py index 4fe6d28..0ca93c2 100755 --- a/sportsdataverse/cfb/cfb_pbp.py +++ b/sportsdataverse/cfb/cfb_pbp.py @@ -614,10 +614,30 @@ def play_text_dupe_checker(row): ].apply(lambda x: int(x)) pbp_txt["plays"]["end.TimeSecsRem"] = pbp_txt["plays"][ "start.TimeSecsRem" - ].shift(1) + ].shift(-1) + pbp_txt["plays"]["end.TimeSecsRem"] = np.select( + [ + pbp_txt["plays"]["end.TimeSecsRem"].isna() == True + ], + [ + 0 + ], + default = pbp_txt["plays"]["end.TimeSecsRem"] + ) + pbp_txt["plays"]["end.adj_TimeSecsRem"] = pbp_txt["plays"][ "start.adj_TimeSecsRem" - ].shift(1) + ].shift(-1) + pbp_txt["plays"]["end.adj_TimeSecsRem"] = np.select( + [ + pbp_txt["plays"]["end.adj_TimeSecsRem"].isna() == True + ], + [ + 0 + ], + default = pbp_txt["plays"]["end.adj_TimeSecsRem"] + ) + pbp_txt["plays"]["end.TimeSecsRem"] = np.where( (pbp_txt["plays"]["game_play_number"] == 1) | ( @@ -847,41 +867,35 @@ def __helper_cfb_pbp(self, pbp_txt): def __helper_cfb_pickcenter(self, pbp_txt): # # Spread definition - # if len(pbp_txt.get("pickcenter",[])) > 0: - # if len(pbp_txt.get("pickcenter", [])) > 1 and "spread" in pbp_txt.get("pickcenter", [])[1].keys(): - # homeFavorite = pbp_txt.get("pickcenter", [])[1].get("homeTeamOdds",{}).get("favorite", "") - # gameSpread = pbp_txt.get("pickcenter", [])[1].get("spread", "") - # overUnder = pbp_txt.get("pickcenter", [])[1].get("overUnder", "") - # gameSpreadAvailable = True - # elif "spread" in pbp_txt.get("pickcenter", [])[0].keys(): - # homeFavorite = pbp_txt.get("pickcenter", [])[0].get("homeTeamOdds",{}).get("favorite", "") - # gameSpread = pbp_txt.get("pickcenter", [])[0].get("spread", "") - # overUnder = pbp_txt.get("pickcenter", [])[0].get("overUnder", "") - # gameSpreadAvailable = True - # else: - # gameSpread = "" - # overUnder = "" - # homeFavorite = "" - # gameSpreadAvailable = False - - # # fix any type errors - # if homeFavorite == "": - # homeFavorite = True + + consensus = list(filter(lambda x: x["provider"]["name"] == "consensus" and "spread" in x.keys(), pbp_txt.get("pickcenter",[]))) + if (len(consensus) == 0): + consensus = pbp_txt.get("pickcenter",[]) + + if len(consensus) > 0: + homeFavorite = consensus[0].get("homeTeamOdds",{}).get("favorite", "") + gameSpread = consensus[0].get("spread", "") + overUnder = consensus[0].get("overUnder", "") + gameSpreadAvailable = (gameSpread != "") + + # fix any type errors + if homeFavorite == "": + homeFavorite = True - # if gameSpread == "": - # gameSpread = 2.5 - # gameSpreadAvailable = False - - # if overUnder == "": - # overUnder = 55.5 - # else: - # gameSpread = 2.5 - # overUnder = 55.5 - # homeFavorite = True - # gameSpreadAvailable = False - - # if gameSpreadAvailable: - # return gameSpread, overUnder, homeFavorite, gameSpreadAvailable + if gameSpread == "": + gameSpread = 2.5 + gameSpreadAvailable = False + + if overUnder == "": + overUnder = 55.5 + else: + gameSpread = 2.5 + overUnder = 55.5 + homeFavorite = True + gameSpreadAvailable = False + + if gameSpreadAvailable: + return gameSpread, overUnder, homeFavorite, gameSpreadAvailable # only use this if we still can't find the odds info from pickcenter return self.__helper__espn_cfb_odds_information__() @@ -4885,11 +4899,58 @@ def __process_wpa(self, play_df): play_df.lead_wp_before, (1 - play_df.lead_wp_before), (1 - play_df.lead_wp_before), - (1 - play_df.wp_after), + play_df.wp_after ], default=play_df.wp_after, ) + play_df["wp_after_case"] = np.select( + [ + (play_df["type.text"] == "Timeout"), + game_complete + & ( + (play_df.lead_play_type.isna()) + | (play_df.game_play_number == max(play_df.game_play_number)) + ) + & (play_df.pos_score_diff_end > 0), + game_complete + & ( + (play_df.lead_play_type.isna()) + | (play_df.game_play_number == max(play_df.game_play_number)) + ) + & (play_df.pos_score_diff_end < 0), + (play_df.end_of_half == 1) + & (play_df["start.pos_team.id"] == play_df.lead_pos_team) + & (play_df["type.text"] != "Timeout"), + (play_df.end_of_half == 1) + & (play_df["start.pos_team.id"] != play_df["end.pos_team.id"]) + & (play_df["type.text"] != "Timeout"), + (play_df.end_of_half == 1) + & (play_df["start.pos_team_receives_2H_kickoff"] == False) + & (play_df["type.text"] == "Timeout"), + (play_df.lead_play_type.isin(["End Period", "End of Half"])) + & (play_df.change_of_pos_team == 0), + (play_df.lead_play_type.isin(["End Period", "End of Half"])) + & (play_df.change_of_pos_team == 1), + (play_df["kickoff_onside"] == True) + & ((play_df["change_of_pos_team"] == True) | (play_df["change_of_poss"] == True)), # onside recovery + (play_df["start.pos_team.id"] != play_df["end.pos_team.id"]), + ], + [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + ], + default=None, + ) + play_df["def_wp_after"] = 1 - play_df.wp_after play_df["home_wp_after"] = np.where( play_df["end.pos_team.id"] == play_df["homeTeamId"], diff --git a/tests/cfb/test_pbp.py b/tests/cfb/test_pbp.py index e535870..cbff8dc 100755 --- a/tests/cfb/test_pbp.py +++ b/tests/cfb/test_pbp.py @@ -2,6 +2,7 @@ import pandas as pd import pytest import logging +from sportsdataverse.cfb.model_vars import * LOGGER = logging.getLogger(__name__) logging.basicConfig() @@ -218,7 +219,6 @@ def test_onside_kickoff_recovery(): LOGGER.info(target_plays_gatech_15.iloc[0]["pos_score_diff_end"]) assert float(target_plays_gatech_15.iloc[0]["wp_after"]) > 0.9 assert float(target_plays_gatech_15.iloc[0]["wpa"]) < 0.1 - def test_play_order(): test = CFBPlayProcess(gameId = 401525825) @@ -314,9 +314,33 @@ def test_ou_tul_bad_spread(): test.espn_cfb_pbp() json_dict_stuff = test.run_processing_pipeline() - LOGGER.info(json_dict_stuff["pickcenter"]) + # LOGGER.info(json_dict_stuff["pickcenter"]) # assert len(json_dict_stuff["pickcenter"]) == 0 assert test.plays_json.loc[0, "gameSpreadAvailable"] == True - assert test.plays_json.loc[0, "homeTeamSpread"] == -31.5 - assert test.plays_json.loc[0, "homeTeamId"] == 201 \ No newline at end of file + assert test.plays_json.loc[0, "homeTeamSpread"] >= 31.0 + assert test.plays_json.loc[0, "homeTeamId"] == 201 + + +def test_osu_mich_bad_wp(): + test = CFBPlayProcess(gameId = 401520434) + test.espn_cfb_pbp() + json_dict_stuff = test.run_processing_pipeline() + + plays = test.plays_json + + plays["lead_play_text"] = plays["text"].shift(-1) + + bad_wpa_play = plays[ + plays["text"].isin([ + "Michigan Penalty, Unsportsmanlike Conduct (Jaylen Harrell) to the MICH 11 for a 1ST down", + "[NHSG] Kneel down by MCCARTHY, J.J. at MIC9 (team loss of 2), clock 00:00." + ]) + ] + + bad_wpa_play["proper_time_set"] = bad_wpa_play["start.adj_TimeSecsRem"] >= bad_wpa_play["end.adj_TimeSecsRem"] + + search_cols = sorted(list(set(wp_start_columns + wp_end_columns))) + LOGGER.info(bad_wpa_play[["id", "text", "lead_play_text", "change_of_poss", "change_of_pos_team", "wp_after_case", "wp_before", "wp_after", "proper_time_set"] + search_cols].to_json(orient = "records", indent = 2)) + + assert bad_wpa_play.proper_time_set.all() \ No newline at end of file