Skip to content

Commit

Permalink
Merge pull request #14 from DesiPilla/add-playoff-simulations
Browse files Browse the repository at this point in the history
Add playoff simulations
  • Loading branch information
DesiPilla authored Nov 8, 2022
2 parents fb36e43 + b99aae8 commit 2b43d1c
Show file tree
Hide file tree
Showing 6 changed files with 540 additions and 249 deletions.
83 changes: 60 additions & 23 deletions src/doritostats/analytic_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,7 @@
from typing import Callable, Dict, List, Optional, Tuple
from espn_api.football import League, Team, Player
from espn_api.football.box_score import BoxScore
from src.doritostats.filter_utils import (
filter_df,
get_any_records,
exclude_most_recent_week,
)

from src.doritostats.filter_utils import get_any_records, exclude_most_recent_week

def get_lineup(
league: League, team: Team, week: int, box_scores: Optional[List[BoxScore]] = None
Expand Down Expand Up @@ -190,6 +185,46 @@ def get_season_luck_indices(league: League, week: int) -> Dict[Team, float]:
return luck_indices


def get_remaining_schedule_difficulty(
team: Team, week: int, strength: str = "points_for"
):
"""
This function returns the average score of a team's remaining opponents.
The `strength` parameter defines how an opponent's "strength" is defined.
- "points_for" means that difficulty is defined by the average points for scored by each of their remaining opponents.
- "win_pct" means that the difficult is defined by the average winning percentage of each of their remaining opponents.
"""
remaining_schedule = team.schedule[week - 1 :]

if strength == "points_for":
# Get all scores from remaining opponenets through specified week
remaining_strength = np.array(
[opp.scores[: week - 1] for opp in remaining_schedule]
).flatten()

# Exclude weeks that haven't occurred yet (not always applicable)
remaining_strength = remaining_strength[remaining_strength > 0]

# Return average score
return remaining_strength.mean()

elif strength == "win_pct":
# Get all scores from remaining opponenets through specified week
remaining_strength = np.array(
[opp.outcomes[: week - 1] for opp in remaining_schedule]
).flatten()

# Divide # of wins by (# of wins + # of losses) -- this excludes matches that tied or have not occurred yet
return sum(remaining_strength == "W") / sum(
(remaining_strength == "W") | (remaining_strength == "L")
)

else:
raise Exception("Unrecognized parameter passed for `strength`")


def sort_lineups_by_func(
league: League, week: int, func, box_scores=None, **kwargs
) -> List[Team]:
Expand Down Expand Up @@ -317,15 +352,15 @@ def print_franchise_records(
n (int): How far down the record list to check (defaults to 5)
"""
# Get a list of all active teams that have been in the league for 2+ years
current_teams = filter_df(df, year=df.year.max()).team_owner.unique()
current_teams = df.query(f"year == {df.year.max()}").team_owner.unique()
list_of_teams = df.groupby(["team_owner"]).nunique()
list_of_teams = list_of_teams[
(list_of_teams.year > 1) & list_of_teams.index.isin(current_teams)
].index.tolist()

for team_owner in list_of_teams:
# Get all rows for the given team
team_df = filter_df(df, team_owner=team_owner)
team_df = df.query(f"team_owner == {team_owner}")

# Get any records for that team
records_df = get_any_records(
Expand Down Expand Up @@ -356,7 +391,7 @@ def get_wins_leaderboard(df: pd.DataFrame) -> pd.DataFrame:
Returns:
pd.Series: Ordered leaderboard by career wins
"""
df = filter_df(df, outcome="win", meaningful=True)
df = df.query(f"outcome == 'win' & is_meaningful_game == True")
leaderboard_df = (
df.groupby("team_owner")
.count()["outcome"]
Expand All @@ -376,7 +411,7 @@ def get_losses_leaderboard(df: pd.DataFrame) -> pd.DataFrame:
Returns:
pd.Series: Ordered leaderboard by career wins
"""
df = filter_df(df, outcome="lose", meaningful=True)
df = df.query(f"outcome == 'lose' & is_meaningful_game == True")
leaderboard_df = (
df.groupby("team_owner")
.count()["outcome"]
Expand Down Expand Up @@ -465,20 +500,22 @@ def get_division_standings(league: League) -> Dict[str, List[Team]]:
def game_of_the_week_stats(
league: League, df: pd.DataFrame, owner1: str, owner2: str
) -> None:
gow_df = filter_df(df, team_owner=owner1, opp_owner=owner2, meaningful=True)
gow_df = df.query(
f"team_owner == {owner1} & opp_owner == {owner2} & is_meaningful_game == True"
)
gow_df.sort_values(["year", "week"], ascending=True, inplace=True)

print(
"{} has won {} / {} matchups.".format(
owner1, len(filter_df(gow_df, outcome="win")), len(gow_df)
owner1, len(gow_df.query(f"outcome == 'win'")), len(gow_df)
)
)
print(
"{} has won {} / {} matchups.".format(
owner2, len(filter_df(gow_df, outcome="lose")), len(gow_df)
owner2, len(gow_df.query(f"outcome == 'lose'")), len(gow_df)
)
)
print("There have been {} ties".format(len(filter_df(gow_df, outcome="tie"))))
print("There have been {} ties".format(len(gow_df.query(f"outcome == 'tie'"))))

last_matchup = gow_df.iloc[-1]
print(
Expand All @@ -502,8 +539,8 @@ def game_of_the_week_stats(
print(f"{owner1} has a record of {team1.wins}-{team1.losses}-{team1.ties}")
print(
"They have averaged {:.2f} points per game.".format(
filter_df(
df, team_owner=owner1, year=league.year, meaningful=True
df.query(
f"team_owner == {owner1} & year == {league.year} & is_meaningful_game == True"
).team_score.mean()
)
)
Expand All @@ -519,8 +556,8 @@ def game_of_the_week_stats(
print(f"{owner2} has a record of {team2.wins}-{team2.losses}-{team2.ties}")
print(
"They have averaged {:.2f} points per game.".format(
filter_df(
df, team_owner=owner2, year=league.year, meaningful=True
df.query(
f"team_owner == {owner2} & year == {league.year} & is_meaningful_game == True"
).team_score.mean()
)
)
Expand All @@ -543,7 +580,7 @@ def weekly_stats_analysis(df: pd.DataFrame, year: int, week: int) -> None:
week (int): Week
"""

df = filter_df(df, meaningful=True)
df = df.query("is_meaningful_game == True")

print("----------------------------------------------------------------")
print(
Expand Down Expand Up @@ -819,11 +856,11 @@ def season_stats_analysis(
week (int, optional): Maximum week to include. Defaults to None.
"""
if week is None:
week = filter_df(df, year=df.year.max()).week.max()
week = df.query(f"year == {df.year.max()}").week.max()

df = filter_df(df, meaningful=True)
df_current_year = filter_df(df, year=league.year)
df_current_week = filter_df(df, year=league.year, week=league.current_week - 1)
df = df.query("is_meaningful_game == True")
df_current_year = df.query(f"year == {league.year}")
df_current_week = df_current_year.query(f"week == {league.current_week - 1}")

print("----------------------------------------------------------------")
print(
Expand Down
35 changes: 28 additions & 7 deletions src/doritostats/fetch_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,8 @@


def set_league_endpoint(league: League) -> None:
"""Set the league's endpoint.
The endpoint looks different depending on if it is the current season or a previous season
Args:
league (League): _description_
"""
"""Set the league's endpoint."""

# Current season
if league.year >= (datetime.datetime.today() - datetime.timedelta(weeks=12)).year:
league.endpoint = (
Expand Down Expand Up @@ -286,6 +281,19 @@ def calculate_outcome(s):
df["outcome"] = df.apply(calculate_outcome, axis=1)
df["is_meaningful_game"] = df.is_regular_season | df.is_playoff

# More calculated fields
df.sort_values(["team_owner", "week"], inplace=True)
df["win"] = df.outcome == "win"
df["tie"] = df.outcome == "tie"
df["lose"] = df.outcome == "lose"
df["season_wins"] = df.groupby(["team_owner"]).win.cumsum()
df["season_ties"] = df.groupby(["team_owner"]).tie.cumsum()
df["season_losses"] = df.groupby(["team_owner"]).lose.cumsum()
df["win_pct"] = df.season_wins / df[["season_wins", "season_losses"]].sum(axis=1)
df["win_pct_entering_matchup"] = (
df.groupby(["team_owner"])["win_pct"].apply(lambda x: x.shift(1)).values
)

return df


Expand Down Expand Up @@ -428,6 +436,19 @@ def calculate_outcome(s):
df["outcome"] = df.apply(calculate_outcome, axis=1)
df["is_meaningful_game"] = df.is_regular_season | df.is_playoff

# More calculated fields
df.sort_values(["team_owner", "week"], inplace=True)
df["win"] = df.outcome == "win"
df["tie"] = df.outcome == "tie"
df["lose"] = df.outcome == "lose"
df["season_wins"] = df.groupby(["team_owner"]).win.cumsum()
df["season_ties"] = df.groupby(["team_owner"]).tie.cumsum()
df["season_losses"] = df.groupby(["team_owner"]).lose.cumsum()
df["win_pct"] = df.season_wins / df[["season_wins", "season_losses"]].sum(axis=1)
df["win_pct_entering_matchup"] = (
df.groupby(["team_owner"])["win_pct"].apply(lambda x: x.shift(1)).values
)

return df


Expand Down
99 changes: 4 additions & 95 deletions src/doritostats/filter_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,97 +2,6 @@
from typing import Optional


def filter_df(
df: pd.DataFrame,
team_owner: Optional[str] = None,
opp_owner: Optional[str] = None,
year: Optional[int] = None,
week: Optional[int] = None,
division: Optional[str] = None,
meaningful: Optional[bool] = None,
is_playoff: Optional[bool] = None,
is_regular_season: Optional[bool] = None,
outcome: Optional[str] = None,
) -> pd.DataFrame:
"""Filter a historical stats dataframe by some fields.
Only records that match all conditions will be returned.
Args:
df (pd.DataFrame): Historical stats dataframe
team_owner (str): Team owner to filter to
opp_owner (str): Opponent owner to filter to
year (int): Year to filter to
week (int): Week to filter to
division (str): Division to filter to
meaningful (bool): Include only 'meaningful' games
is_playoff_game (bool): Include only playoff games games
is_regular_season (bool): Include only regular season games
outcome (str): Outcome to filter to ('win', 'lose', 'tie')
Returns:
df (pd.DataFrame): Filtered dataframe
"""
if team_owner is not None:
df = df[df.team_owner == team_owner]
if opp_owner is not None:
df = df[df.opp_owner == opp_owner]
if year is not None:
df = df[df.year == year]
if week is not None:
df = df[df.week == week]
if division is not None:
df = df[df.division == division]
if meaningful is not None:
df = df[df.is_meaningful_game == meaningful]
if is_playoff is not None:
df = df[df.is_playoff == is_playoff]
if is_regular_season is not None:
df = df[df.is_regular_season == is_regular_season]
if outcome is not None:
df = df[df.outcome == outcome]
return df


def exclude_df(
df: pd.DataFrame,
team_owner: Optional[str] = None,
year: Optional[int] = None,
week: Optional[int] = None,
division: Optional[str] = None,
meaningful: Optional[bool] = None,
outcome: Optional[str] = None,
) -> pd.DataFrame:
"""Filter a historical stats dataframe by some fields.
Only records that match all conditions will be excluded.
Args:
df (pd.DataFrame): Historical stats dataframe
team_owner (str): Team owner to exclude
year (int): Year to exclude
week (int): Week to exclude
division (str): Division to exclude
meaningful (bool): Exclude only 'meaningful' games
outcome (str): Outcome to exclude to ('win', 'lose', 'tie')
Returns:
df (pd.DataFrame): Filtered dataframe
"""
conditions = [True] * len(df)
if team_owner is not None:
conditions &= df.team_owner == team_owner
if year is not None:
conditions &= df.year == year
if week is not None:
conditions &= df.week == week
if division is not None:
conditions &= df.division == division
if meaningful is not None:
conditions &= df.meaningful == meaningful
if outcome is not None:
conditions &= df.outcome == outcome
return df[~conditions] # type: ignore


def exclude_most_recent_week(df: pd.DataFrame) -> pd.DataFrame:
"""Filter out the most recent week of matchups from the historical stats dataframe.
Expand All @@ -103,8 +12,8 @@ def exclude_most_recent_week(df: pd.DataFrame) -> pd.DataFrame:
pd.DataFrame: Filtered dataframe
"""
year_to_exclude = df.year.max()
week_to_exclude = filter_df(df, year=year_to_exclude).week.max()
return exclude_df(df, year=year_to_exclude, week=week_to_exclude)
week_to_exclude = df.query(f"year == {year_to_exclude}").week.max()
return df.query(f"~(year == {year_to_exclude} & week == {week_to_exclude})")


def get_any_records(
Expand All @@ -131,7 +40,7 @@ def get_any_records(
sub_df["rank"] = sub_df[stat].rank(ascending=(not high_first), method="min")

# Keep only the top n records, in the year-week of note
sub_df = sub_df[sub_df["rank"] <= n]
sub_df = filter_df(sub_df, year=year, week=week)
sub_df = sub_df[sub_df['rank'] <= n]
sub_df = sub_df.query(f"year == {year} & week == {week}")

return sub_df[["year", "week", "team_owner", stat, "rank"]]
Loading

0 comments on commit 2b43d1c

Please sign in to comment.