Merge pull request #14 from DesiPilla/add-playoff-simulations

Add playoff simulations
DesiPilla · Nov 8, 2022 · 2b43d1c · 2b43d1c
2 parents fb36e43 + b99aae8
commit 2b43d1c
Show file tree

Hide file tree

Showing 6 changed files with 540 additions and 249 deletions.
diff --git a/src/doritostats/analytic_utils.py b/src/doritostats/analytic_utils.py
@@ -4,12 +4,7 @@
 from typing import Callable, Dict, List, Optional, Tuple
 from espn_api.football import League, Team, Player
 from espn_api.football.box_score import BoxScore
-from src.doritostats.filter_utils import (
-    filter_df,
-    get_any_records,
-    exclude_most_recent_week,
-)
-
+from src.doritostats.filter_utils import get_any_records, exclude_most_recent_week
 
 def get_lineup(
     league: League, team: Team, week: int, box_scores: Optional[List[BoxScore]] = None
@@ -190,6 +185,46 @@ def get_season_luck_indices(league: League, week: int) -> Dict[Team, float]:
     return luck_indices
 
 
+def get_remaining_schedule_difficulty(
+    team: Team, week: int, strength: str = "points_for"
+):
+    """
+    This function returns the average score of a team's remaining opponents.
+
+    The `strength` parameter defines how an opponent's "strength" is defined.
+        - "points_for" means that difficulty is defined by the average points for scored by each of their remaining opponents.
+        - "win_pct" means that the difficult is defined by the average winning percentage of each of their remaining opponents.
+
+    """
+    remaining_schedule = team.schedule[week - 1 :]
+
+    if strength == "points_for":
+        # Get all scores from remaining opponenets through specified week
+        remaining_strength = np.array(
+            [opp.scores[: week - 1] for opp in remaining_schedule]
+        ).flatten()
+
+        # Exclude weeks that haven't occurred yet (not always applicable)
+        remaining_strength = remaining_strength[remaining_strength > 0]
+
+        # Return average score
+        return remaining_strength.mean()
+
+    elif strength == "win_pct":
+        # Get all scores from remaining opponenets through specified week
+        remaining_strength = np.array(
+            [opp.outcomes[: week - 1] for opp in remaining_schedule]
+        ).flatten()
+
+        # Divide # of wins by (# of wins + # of losses) -- this excludes matches that tied or have not occurred yet
+        return sum(remaining_strength == "W") / sum(
+            (remaining_strength == "W") | (remaining_strength == "L")
+        )
+
+    else:
+        raise Exception("Unrecognized parameter passed for `strength`")
+
+
 def sort_lineups_by_func(
     league: League, week: int, func, box_scores=None, **kwargs
 ) -> List[Team]:
@@ -317,15 +352,15 @@ def print_franchise_records(
         n (int): How far down the record list to check (defaults to 5)
     """
     # Get a list of all active teams that have been in the league for 2+ years
-    current_teams = filter_df(df, year=df.year.max()).team_owner.unique()
+    current_teams = df.query(f"year == {df.year.max()}").team_owner.unique()
     list_of_teams = df.groupby(["team_owner"]).nunique()
     list_of_teams = list_of_teams[
         (list_of_teams.year > 1) & list_of_teams.index.isin(current_teams)
     ].index.tolist()
 
     for team_owner in list_of_teams:
         # Get all rows for the given team
-        team_df = filter_df(df, team_owner=team_owner)
+        team_df = df.query(f"team_owner == {team_owner}")
 
         # Get any records for that team
         records_df = get_any_records(
@@ -356,7 +391,7 @@ def get_wins_leaderboard(df: pd.DataFrame) -> pd.DataFrame:
     Returns:
         pd.Series: Ordered leaderboard by career wins
     """
-    df = filter_df(df, outcome="win", meaningful=True)
+    df = df.query(f"outcome == 'win' & is_meaningful_game == True")
     leaderboard_df = (
         df.groupby("team_owner")
         .count()["outcome"]
@@ -376,7 +411,7 @@ def get_losses_leaderboard(df: pd.DataFrame) -> pd.DataFrame:
     Returns:
         pd.Series: Ordered leaderboard by career wins
     """
-    df = filter_df(df, outcome="lose", meaningful=True)
+    df = df.query(f"outcome == 'lose' & is_meaningful_game == True")
     leaderboard_df = (
         df.groupby("team_owner")
         .count()["outcome"]
@@ -465,20 +500,22 @@ def get_division_standings(league: League) -> Dict[str, List[Team]]:
 def game_of_the_week_stats(
     league: League, df: pd.DataFrame, owner1: str, owner2: str
 ) -> None:
-    gow_df = filter_df(df, team_owner=owner1, opp_owner=owner2, meaningful=True)
+    gow_df = df.query(
+        f"team_owner == {owner1} & opp_owner == {owner2} & is_meaningful_game == True"
+    )
     gow_df.sort_values(["year", "week"], ascending=True, inplace=True)
 
     print(
         "{} has won {} / {} matchups.".format(
-            owner1, len(filter_df(gow_df, outcome="win")), len(gow_df)
+            owner1, len(gow_df.query(f"outcome == 'win'")), len(gow_df)
         )
     )
     print(
         "{} has won {} / {} matchups.".format(
-            owner2, len(filter_df(gow_df, outcome="lose")), len(gow_df)
+            owner2, len(gow_df.query(f"outcome == 'lose'")), len(gow_df)
         )
     )
-    print("There have been {} ties".format(len(filter_df(gow_df, outcome="tie"))))
+    print("There have been {} ties".format(len(gow_df.query(f"outcome == 'tie'"))))
 
     last_matchup = gow_df.iloc[-1]
     print(
@@ -502,8 +539,8 @@ def game_of_the_week_stats(
     print(f"{owner1} has a record of {team1.wins}-{team1.losses}-{team1.ties}")
     print(
         "They have averaged {:.2f} points per game.".format(
-            filter_df(
-                df, team_owner=owner1, year=league.year, meaningful=True
+            df.query(
+                f"team_owner == {owner1} & year == {league.year} & is_meaningful_game == True"
             ).team_score.mean()
         )
     )
@@ -519,8 +556,8 @@ def game_of_the_week_stats(
     print(f"{owner2} has a record of {team2.wins}-{team2.losses}-{team2.ties}")
     print(
         "They have averaged {:.2f} points per game.".format(
-            filter_df(
-                df, team_owner=owner2, year=league.year, meaningful=True
+            df.query(
+                f"team_owner == {owner2} & year == {league.year} & is_meaningful_game == True"
             ).team_score.mean()
         )
     )
@@ -543,7 +580,7 @@ def weekly_stats_analysis(df: pd.DataFrame, year: int, week: int) -> None:
         week (int): Week
     """
 
-    df = filter_df(df, meaningful=True)
+    df = df.query("is_meaningful_game == True")
 
     print("----------------------------------------------------------------")
     print(
@@ -819,11 +856,11 @@ def season_stats_analysis(
         week (int, optional): Maximum week to include. Defaults to None.
     """
     if week is None:
-        week = filter_df(df, year=df.year.max()).week.max()
+        week = df.query(f"year == {df.year.max()}").week.max()
 
-    df = filter_df(df, meaningful=True)
-    df_current_year = filter_df(df, year=league.year)
-    df_current_week = filter_df(df, year=league.year, week=league.current_week - 1)
+    df = df.query("is_meaningful_game == True")
+    df_current_year = df.query(f"year == {league.year}")
+    df_current_week = df_current_year.query(f"week == {league.current_week - 1}")
 
     print("----------------------------------------------------------------")
     print(

diff --git a/src/doritostats/fetch_utils.py b/src/doritostats/fetch_utils.py
@@ -14,13 +14,8 @@
 
 
 def set_league_endpoint(league: League) -> None:
-    """Set the league's endpoint.
-
-    The endpoint looks different depending on if it is the current season or a previous season
-
-    Args:
-        league (League): _description_
-    """
+    """Set the league's endpoint."""
+
     # Current season
     if league.year >= (datetime.datetime.today() - datetime.timedelta(weeks=12)).year:
         league.endpoint = (
@@ -286,6 +281,19 @@ def calculate_outcome(s):
     df["outcome"] = df.apply(calculate_outcome, axis=1)
     df["is_meaningful_game"] = df.is_regular_season | df.is_playoff
 
+    # More calculated fields
+    df.sort_values(["team_owner", "week"], inplace=True)
+    df["win"] = df.outcome == "win"
+    df["tie"] = df.outcome == "tie"
+    df["lose"] = df.outcome == "lose"
+    df["season_wins"] = df.groupby(["team_owner"]).win.cumsum()
+    df["season_ties"] = df.groupby(["team_owner"]).tie.cumsum()
+    df["season_losses"] = df.groupby(["team_owner"]).lose.cumsum()
+    df["win_pct"] = df.season_wins / df[["season_wins", "season_losses"]].sum(axis=1)
+    df["win_pct_entering_matchup"] = (
+        df.groupby(["team_owner"])["win_pct"].apply(lambda x: x.shift(1)).values
+    )
+
     return df
 
 
@@ -428,6 +436,19 @@ def calculate_outcome(s):
     df["outcome"] = df.apply(calculate_outcome, axis=1)
     df["is_meaningful_game"] = df.is_regular_season | df.is_playoff
 
+    # More calculated fields
+    df.sort_values(["team_owner", "week"], inplace=True)
+    df["win"] = df.outcome == "win"
+    df["tie"] = df.outcome == "tie"
+    df["lose"] = df.outcome == "lose"
+    df["season_wins"] = df.groupby(["team_owner"]).win.cumsum()
+    df["season_ties"] = df.groupby(["team_owner"]).tie.cumsum()
+    df["season_losses"] = df.groupby(["team_owner"]).lose.cumsum()
+    df["win_pct"] = df.season_wins / df[["season_wins", "season_losses"]].sum(axis=1)
+    df["win_pct_entering_matchup"] = (
+        df.groupby(["team_owner"])["win_pct"].apply(lambda x: x.shift(1)).values
+    )
+
     return df
 
 

diff --git a/src/doritostats/filter_utils.py b/src/doritostats/filter_utils.py
@@ -2,97 +2,6 @@
 from typing import Optional
 
 
-def filter_df(
-    df: pd.DataFrame,
-    team_owner: Optional[str] = None,
-    opp_owner: Optional[str] = None,
-    year: Optional[int] = None,
-    week: Optional[int] = None,
-    division: Optional[str] = None,
-    meaningful: Optional[bool] = None,
-    is_playoff: Optional[bool] = None,
-    is_regular_season: Optional[bool] = None,
-    outcome: Optional[str] = None,
-) -> pd.DataFrame:
-    """Filter a historical stats dataframe by some fields.
-    Only records that match all conditions will be returned.
-
-    Args:
-        df (pd.DataFrame): Historical stats dataframe
-        team_owner (str): Team owner to filter to
-        opp_owner (str): Opponent owner to filter to
-        year (int): Year to filter to
-        week (int): Week to filter to
-        division (str): Division to filter to
-        meaningful (bool): Include only 'meaningful' games
-        is_playoff_game (bool): Include only playoff games games
-        is_regular_season (bool): Include only regular season games
-        outcome (str): Outcome to filter to ('win', 'lose', 'tie')
-
-    Returns:
-        df (pd.DataFrame): Filtered dataframe
-    """
-    if team_owner is not None:
-        df = df[df.team_owner == team_owner]
-    if opp_owner is not None:
-        df = df[df.opp_owner == opp_owner]
-    if year is not None:
-        df = df[df.year == year]
-    if week is not None:
-        df = df[df.week == week]
-    if division is not None:
-        df = df[df.division == division]
-    if meaningful is not None:
-        df = df[df.is_meaningful_game == meaningful]
-    if is_playoff is not None:
-        df = df[df.is_playoff == is_playoff]
-    if is_regular_season is not None:
-        df = df[df.is_regular_season == is_regular_season]
-    if outcome is not None:
-        df = df[df.outcome == outcome]
-    return df
-
-
-def exclude_df(
-    df: pd.DataFrame,
-    team_owner: Optional[str] = None,
-    year: Optional[int] = None,
-    week: Optional[int] = None,
-    division: Optional[str] = None,
-    meaningful: Optional[bool] = None,
-    outcome: Optional[str] = None,
-) -> pd.DataFrame:
-    """Filter a historical stats dataframe by some fields.
-    Only records that match all conditions will be excluded.
-
-    Args:
-        df (pd.DataFrame): Historical stats dataframe
-        team_owner (str): Team owner to exclude
-        year (int): Year to exclude
-        week (int): Week to exclude
-        division (str): Division to exclude
-        meaningful (bool): Exclude only 'meaningful' games
-        outcome (str): Outcome to exclude to ('win', 'lose', 'tie')
-
-    Returns:
-        df (pd.DataFrame): Filtered dataframe
-    """
-    conditions = [True] * len(df)
-    if team_owner is not None:
-        conditions &= df.team_owner == team_owner
-    if year is not None:
-        conditions &= df.year == year
-    if week is not None:
-        conditions &= df.week == week
-    if division is not None:
-        conditions &= df.division == division
-    if meaningful is not None:
-        conditions &= df.meaningful == meaningful
-    if outcome is not None:
-        conditions &= df.outcome == outcome
-    return df[~conditions]  # type: ignore
-
-
 def exclude_most_recent_week(df: pd.DataFrame) -> pd.DataFrame:
     """Filter out the most recent week of matchups from the historical stats dataframe.
 
@@ -103,8 +12,8 @@ def exclude_most_recent_week(df: pd.DataFrame) -> pd.DataFrame:
         pd.DataFrame: Filtered dataframe
     """
     year_to_exclude = df.year.max()
-    week_to_exclude = filter_df(df, year=year_to_exclude).week.max()
-    return exclude_df(df, year=year_to_exclude, week=week_to_exclude)
+    week_to_exclude = df.query(f"year == {year_to_exclude}").week.max()
+    return df.query(f"~(year == {year_to_exclude} & week == {week_to_exclude})")
 
 
 def get_any_records(
@@ -131,7 +40,7 @@ def get_any_records(
     sub_df["rank"] = sub_df[stat].rank(ascending=(not high_first), method="min")
 
     # Keep only the top n records, in the year-week of note
-    sub_df = sub_df[sub_df["rank"] <= n]
-    sub_df = filter_df(sub_df, year=year, week=week)
+    sub_df = sub_df[sub_df['rank'] <= n]
+    sub_df = sub_df.query(f"year == {year} & week == {week}")
 
     return sub_df[["year", "week", "team_owner", stat, "rank"]]