Skip to content

Commit

Permalink
fixing issues with bad time checks
Browse files Browse the repository at this point in the history
  • Loading branch information
akeaswaran committed Feb 4, 2024
1 parent 9e27d23 commit 6ba177c
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 42 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
# Versions should comply with PEP440. For a discussion on single-sourcing
# the version across setup.py and the project code, see
# https://packaging.python.org/en/latest/single_source_version.html
version="0.0.36.2.6",
version="0.0.36.2.7",
description="Retrieve Sports data in Python",
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down
135 changes: 98 additions & 37 deletions sportsdataverse/cfb/cfb_pbp.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,10 +614,30 @@ def play_text_dupe_checker(row):
].apply(lambda x: int(x))
pbp_txt["plays"]["end.TimeSecsRem"] = pbp_txt["plays"][
"start.TimeSecsRem"
].shift(1)
].shift(-1)
pbp_txt["plays"]["end.TimeSecsRem"] = np.select(
[
pbp_txt["plays"]["end.TimeSecsRem"].isna() == True
],
[
0
],
default = pbp_txt["plays"]["end.TimeSecsRem"]
)

pbp_txt["plays"]["end.adj_TimeSecsRem"] = pbp_txt["plays"][
"start.adj_TimeSecsRem"
].shift(1)
].shift(-1)
pbp_txt["plays"]["end.adj_TimeSecsRem"] = np.select(
[
pbp_txt["plays"]["end.adj_TimeSecsRem"].isna() == True
],
[
0
],
default = pbp_txt["plays"]["end.adj_TimeSecsRem"]
)

pbp_txt["plays"]["end.TimeSecsRem"] = np.where(
(pbp_txt["plays"]["game_play_number"] == 1)
| (
Expand Down Expand Up @@ -847,41 +867,35 @@ def __helper_cfb_pbp(self, pbp_txt):

def __helper_cfb_pickcenter(self, pbp_txt):
# # Spread definition
# if len(pbp_txt.get("pickcenter",[])) > 0:
# if len(pbp_txt.get("pickcenter", [])) > 1 and "spread" in pbp_txt.get("pickcenter", [])[1].keys():
# homeFavorite = pbp_txt.get("pickcenter", [])[1].get("homeTeamOdds",{}).get("favorite", "")
# gameSpread = pbp_txt.get("pickcenter", [])[1].get("spread", "")
# overUnder = pbp_txt.get("pickcenter", [])[1].get("overUnder", "")
# gameSpreadAvailable = True
# elif "spread" in pbp_txt.get("pickcenter", [])[0].keys():
# homeFavorite = pbp_txt.get("pickcenter", [])[0].get("homeTeamOdds",{}).get("favorite", "")
# gameSpread = pbp_txt.get("pickcenter", [])[0].get("spread", "")
# overUnder = pbp_txt.get("pickcenter", [])[0].get("overUnder", "")
# gameSpreadAvailable = True
# else:
# gameSpread = ""
# overUnder = ""
# homeFavorite = ""
# gameSpreadAvailable = False

# # fix any type errors
# if homeFavorite == "":
# homeFavorite = True

consensus = list(filter(lambda x: x["provider"]["name"] == "consensus" and "spread" in x.keys(), pbp_txt.get("pickcenter",[])))
if (len(consensus) == 0):
consensus = pbp_txt.get("pickcenter",[])

if len(consensus) > 0:
homeFavorite = consensus[0].get("homeTeamOdds",{}).get("favorite", "")
gameSpread = consensus[0].get("spread", "")
overUnder = consensus[0].get("overUnder", "")
gameSpreadAvailable = (gameSpread != "")

# fix any type errors
if homeFavorite == "":
homeFavorite = True

# if gameSpread == "":
# gameSpread = 2.5
# gameSpreadAvailable = False

# if overUnder == "":
# overUnder = 55.5
# else:
# gameSpread = 2.5
# overUnder = 55.5
# homeFavorite = True
# gameSpreadAvailable = False

# if gameSpreadAvailable:
# return gameSpread, overUnder, homeFavorite, gameSpreadAvailable
if gameSpread == "":
gameSpread = 2.5
gameSpreadAvailable = False

if overUnder == "":
overUnder = 55.5
else:
gameSpread = 2.5
overUnder = 55.5
homeFavorite = True
gameSpreadAvailable = False

if gameSpreadAvailable:
return gameSpread, overUnder, homeFavorite, gameSpreadAvailable

# only use this if we still can't find the odds info from pickcenter
return self.__helper__espn_cfb_odds_information__()
Expand Down Expand Up @@ -4885,11 +4899,58 @@ def __process_wpa(self, play_df):
play_df.lead_wp_before,
(1 - play_df.lead_wp_before),
(1 - play_df.lead_wp_before),
(1 - play_df.wp_after),
play_df.wp_after
],
default=play_df.wp_after,
)

play_df["wp_after_case"] = np.select(
[
(play_df["type.text"] == "Timeout"),
game_complete
& (
(play_df.lead_play_type.isna())
| (play_df.game_play_number == max(play_df.game_play_number))
)
& (play_df.pos_score_diff_end > 0),
game_complete
& (
(play_df.lead_play_type.isna())
| (play_df.game_play_number == max(play_df.game_play_number))
)
& (play_df.pos_score_diff_end < 0),
(play_df.end_of_half == 1)
& (play_df["start.pos_team.id"] == play_df.lead_pos_team)
& (play_df["type.text"] != "Timeout"),
(play_df.end_of_half == 1)
& (play_df["start.pos_team.id"] != play_df["end.pos_team.id"])
& (play_df["type.text"] != "Timeout"),
(play_df.end_of_half == 1)
& (play_df["start.pos_team_receives_2H_kickoff"] == False)
& (play_df["type.text"] == "Timeout"),
(play_df.lead_play_type.isin(["End Period", "End of Half"]))
& (play_df.change_of_pos_team == 0),
(play_df.lead_play_type.isin(["End Period", "End of Half"]))
& (play_df.change_of_pos_team == 1),
(play_df["kickoff_onside"] == True)
& ((play_df["change_of_pos_team"] == True) | (play_df["change_of_poss"] == True)), # onside recovery
(play_df["start.pos_team.id"] != play_df["end.pos_team.id"]),
],
[
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
],
default=None,
)

play_df["def_wp_after"] = 1 - play_df.wp_after
play_df["home_wp_after"] = np.where(
play_df["end.pos_team.id"] == play_df["homeTeamId"],
Expand Down
32 changes: 28 additions & 4 deletions tests/cfb/test_pbp.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pandas as pd
import pytest
import logging
from sportsdataverse.cfb.model_vars import *

LOGGER = logging.getLogger(__name__)
logging.basicConfig()
Expand Down Expand Up @@ -218,7 +219,6 @@ def test_onside_kickoff_recovery():
LOGGER.info(target_plays_gatech_15.iloc[0]["pos_score_diff_end"])
assert float(target_plays_gatech_15.iloc[0]["wp_after"]) > 0.9
assert float(target_plays_gatech_15.iloc[0]["wpa"]) < 0.1


def test_play_order():
test = CFBPlayProcess(gameId = 401525825)
Expand Down Expand Up @@ -314,9 +314,33 @@ def test_ou_tul_bad_spread():
test.espn_cfb_pbp()
json_dict_stuff = test.run_processing_pipeline()

LOGGER.info(json_dict_stuff["pickcenter"])
# LOGGER.info(json_dict_stuff["pickcenter"])

# assert len(json_dict_stuff["pickcenter"]) == 0
assert test.plays_json.loc[0, "gameSpreadAvailable"] == True
assert test.plays_json.loc[0, "homeTeamSpread"] == -31.5
assert test.plays_json.loc[0, "homeTeamId"] == 201
assert test.plays_json.loc[0, "homeTeamSpread"] >= 31.0
assert test.plays_json.loc[0, "homeTeamId"] == 201


def test_osu_mich_bad_wp():
test = CFBPlayProcess(gameId = 401520434)
test.espn_cfb_pbp()
json_dict_stuff = test.run_processing_pipeline()

plays = test.plays_json

plays["lead_play_text"] = plays["text"].shift(-1)

bad_wpa_play = plays[
plays["text"].isin([
"Michigan Penalty, Unsportsmanlike Conduct (Jaylen Harrell) to the MICH 11 for a 1ST down",
"[NHSG] Kneel down by MCCARTHY, J.J. at MIC9 (team loss of 2), clock 00:00."
])
]

bad_wpa_play["proper_time_set"] = bad_wpa_play["start.adj_TimeSecsRem"] >= bad_wpa_play["end.adj_TimeSecsRem"]

search_cols = sorted(list(set(wp_start_columns + wp_end_columns)))
LOGGER.info(bad_wpa_play[["id", "text", "lead_play_text", "change_of_poss", "change_of_pos_team", "wp_after_case", "wp_before", "wp_after", "proper_time_set"] + search_cols].to_json(orient = "records", indent = 2))

assert bad_wpa_play.proper_time_set.all()

0 comments on commit 6ba177c

Please sign in to comment.