Skip to content

Commit

Permalink
initial commit of pre-2022 season build
Browse files Browse the repository at this point in the history
  • Loading branch information
Kazink36 committed Aug 16, 2023
1 parent 92b3e22 commit 6719ef9
Show file tree
Hide file tree
Showing 34 changed files with 3,159 additions and 2 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
^cfbseedR\.Rproj$
^\.Rproj\.user$
^data-raw$
^README\.Rmd$
7 changes: 5 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Package: cfbseedR
Package: cfbsimR
Title: What the Package Does (One Line, Title Case)
Version: 0.0.0.9000
Authors@R:
Expand All @@ -9,4 +9,7 @@ License: `use_mit_license()`, `use_gpl3_license()` or friends to pick a
license
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.2
RoxygenNote: 7.2.0
Depends:
R (>= 2.10)
LazyData: true
23 changes: 23 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,2 +1,25 @@
# Generated by roxygen2: do not edit by hand

S3method(summary,cfbsimR_simulation)
export(compute_conference_seeds)
export(compute_division_ranks)
export(compute_draft_order)
export(load_cfb_games)
export(simulate_cfb)
import(dplyr)
import(gsubfn)
importFrom(cli,symbol)
importFrom(crayon,red)
importFrom(furrr,furrr_options)
importFrom(furrr,future_map)
importFrom(furrr,future_map_dfr)
importFrom(future,plan)
importFrom(glue,glue)
importFrom(glue,glue_collapse)
importFrom(magrittr,"%>%")
importFrom(progressr,progressor)
importFrom(purrr,pluck)
importFrom(rlang,inform)
importFrom(stats,rnorm)
importFrom(tibble,is_tibble)
importFrom(tidyr,pivot_longer)
Binary file added R/.DS_Store
Binary file not shown.
22 changes: 22 additions & 0 deletions R/cfbsimR-package.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#' @keywords internal
"_PACKAGE"

# The following block is used by usethis to automatically manage
# roxygen namespace tags. Modify with care!
## usethis namespace: start
#' @import dplyr
#' @import gsubfn
#' @importFrom cli symbol
#' @importFrom crayon red
#' @importFrom furrr future_map future_map_dfr furrr_options
#' @importFrom future plan
#' @importFrom glue glue glue_collapse
#' @importFrom magrittr %>%
#' @importFrom progressr progressor
#' @importFrom purrr pluck
#' @importFrom rlang inform
#' @importFrom stats rnorm
#' @importFrom tibble is_tibble
#' @importFrom tidyr pivot_longer
## usethis namespace: end
NULL
100 changes: 100 additions & 0 deletions R/compute_conference_seeds.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#' Compute NFL Playoff Seedings using Game Results and Divisional Rankings
#'
#' @inheritParams compute_division_ranks
#' @param teams The division standings data frame as computed by
#' \code{\link{compute_division_ranks}}
#' @param playoff_seeds Number of playoff teams per conference (increased
#' in 2020 from 6 to 7).
#'
#' @returns A data frame of division standings including playoff seeds and the
#' week in which the season ended for the respective team (\code{exit}).
#' @returns A list of two data frames:
#' \describe{
#' \item{standings}{Division standings including playoff seeds.}
#' \item{h2h}{A data frame that is used for head-to-head tiebreakers across the
#' tie-breaking functions.}
#' }
#' @seealso The examples [on the package website](https://nflseedr.com/articles/articles/nflseedR.html)
#' @export
#' @examples
#' \donttest{
#' # Change some options for better output
#' old <- options(list(digits = 3, tibble.print_min = 64))
#' library(dplyr, warn.conflicts = FALSE)
#'
#' nflseedR::load_sharpe_games() %>%
#' dplyr::filter(season %in% 2019:2020) %>%
#' dplyr::select(sim = season, game_type, week, away_team, home_team, result) %>%
#' nflseedR::compute_division_ranks() %>%
#' nflseedR::compute_conference_seeds(h2h = .$h2h) %>%
#' purrr::pluck("standings")
#'
#' # Restore old options
#' options(old)
#' }
compute_conference_seeds <- function(teams,
h2h = NULL,
tiebreaker_depth = 3,
.debug = FALSE,
playoff_seeds = 7) {
# catch invalid input
if (!isTRUE(tiebreaker_depth %in% 1:3)) {
stop(
"The argument `tiebreaker_depth` has to be",
"a single value in the range of 1-3!"
)
}

if (!is_tibble(teams)) teams <- teams$standings

if (!any((names(teams) %in% "div_rank")) | !is.data.frame(teams)) {
stop(
"The argument `teams` has to be a data frame including ",
"the variable `div_rank` as computed by `compute_division_ranks()`!"
)
}

if(is.null(h2h) & tiebreaker_depth > TIEBREAKERS_NONE){
stop("You asked for tiebreakers but the argument `h2h` is NULL. ",
"Did you forget to pass the `h2h` data frame? It is computed with the ",
"function `compute_division_ranks()`."
)
}

teams <- teams %>%
mutate(conf_rank = NA_real_)

# seed loop
for (seed_num in seq_len(playoff_seeds))
{
report(paste0("Calculating seed #", seed_num))

# find teams at this seed
update <- teams %>%
filter(is.na(conf_rank)) %>%
mutate(div_winner = (div_rank == 1)) %>%
group_by(sim, conf) %>%
filter(div_winner == max(div_winner)) %>%
filter(win_pct == max(win_pct)) %>%
mutate(conf_rank = ifelse(n() == 1, as.numeric(seed_num), conf_rank)) %>%
ungroup() %>%
group_by(sim, conf, division) %>%
mutate(div_best_left = (div_rank == min(div_rank))) %>%
ungroup() %>%
break_conference_ties(seed_num, h2h = h2h, tb_depth = tiebreaker_depth, .debug = .debug)

# store updates
teams <- teams %>%
left_join(update, by = c("sim", "team")) %>%
mutate(conf_rank = ifelse(!is.na(new_rank), new_rank, conf_rank)) %>%
select(-new_rank)
} # end conference rank loop

# rename conference rank to seed
teams <- teams %>%
rename(seed = conf_rank) %>%
mutate(exit = ifelse(is.na(seed), max_reg_week, NA_real_)) %>%
select(-max_reg_week)

return(list(standings = teams, h2h = h2h))
}
215 changes: 215 additions & 0 deletions R/compute_division_ranks.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
#' Compute NFL Division Rankings using Game Results
#'
#' @param games A data frame containing real or simulated game scores. The
#' following variables are required:
#' \describe{
#' \item{sim}{A simulation ID. Normally 1 - n simulated seasons.}
#' \item{game_type}{One of 'REG', 'WC', 'DIV', 'CON', 'SB' indicating if a game was a regular season game or one of the playoff rounds.}
#' \item{week}{The week of the corresponding NFL season.}
#' \item{away_team}{Team abbreviation of the away team (please see
#' \code{\link{divisions}} for valid team abbreviations).}
#' \item{home_team}{Team abbreviation of the home team (please see
#' \code{\link{divisions}} for valid team abbreviations).}
#' \item{result}{Equals home score - away score.}
#' }
#' @param teams This parameter is optional. If it is \code{NULL} the function
#' will compute it internally, otherwise it has to be a data frame of all teams
#' contained in the \code{games} data frame repeated for each simulation ID
#' (\code{sim}). The following variables are required:
#' \describe{
#' \item{sim}{A simulation ID. Normally 1 - n simulated seasons.}
#' \item{team}{Team abbreviation of the team (please see
#' \code{\link{divisions}} for valid team abbreviations).}
#' \item{conf}{Conference abbreviation of the team (please see
#' \code{\link{divisions}} for valid team abbreviations).}
#' \item{division}{Division of the team (please see
#' \code{\link{divisions}} for valid division names).}
#' }
#' @param tiebreaker_depth A single value equal to 1, 2, or 3. The default is 3. The
#' value controls the depth of tiebreakers that shall be applied. The deepest
#' currently implemented tiebreaker is strength of schedule. The following
#' values are valid:
#' \describe{
#' \item{tiebreaker_depth = 1}{Break all ties with a coinflip. Fastest variant.}
#' \item{tiebreaker_depth = 2}{Apply head-to-head and division win percentage tiebreakers. Random if still tied.}
#' \item{tiebreaker_depth = 3}{Apply all tiebreakers through strength of schedule. Random if still tied.}
#' }
#' @param .debug Either \code{TRUE} or \code{FALSE}. Controls whether additional
#' messages are printed to the console showing what the tie-breaking algorithms
#' are currently performing.
#' @param h2h A data frame that is used for head-to-head tiebreakers across the
#' tie-breaking functions. It is computed by the function
#' \code{\link{compute_division_ranks}}.
#' @returns A list of two data frames:
#' \describe{
#' \item{standings}{Division standings.}
#' \item{h2h}{A data frame that is used for head-to-head tiebreakers across the
#' tie-breaking functions.}
#' }
#' @seealso The examples [on the package website](https://nflseedr.com/articles/articles/nflseedR.html)
#' @export
#' @examples
#' \donttest{
#' # Change some options for better output
#' old <- options(list(digits = 3, tibble.print_min = 64))
#' library(dplyr, warn.conflicts = FALSE)
#'
#' nflseedR::load_sharpe_games() %>%
#' dplyr::filter(season %in% 2019:2020) %>%
#' dplyr::select(sim = season, game_type, week, away_team, home_team, result) %>%
#' nflseedR::compute_division_ranks() %>%
#' purrr::pluck("standings")
#'
#' # Restore old options
#' options(old)
#' }
compute_division_ranks <- function(games,
teams = NULL,
tiebreaker_depth = 3,
.debug = FALSE,
h2h = NULL) {
# catch invalid input
if (!isTRUE(tiebreaker_depth %in% 1:3)) {
stop(
"The argument `tiebreaker_depth` has to be",
"a single value in the range of 1-3!"
)
}

required_vars <- c(
"sim",
"game_type",
"week",
"away_team",
"home_team",
"result"
)

if (!sum(names(games) %in% required_vars, na.rm = TRUE) >= 6 | !is.data.frame(games)) {
stop(
"The argument `games` has to be a data frame including ",
"all of the following variables: ",
glue_collapse(required_vars, sep = ", ", last = " and "),
"!"
)
}

if (is.null(teams)) { # compute teams df from games df
pivot_games <- games %>%
select(sim, home_team, away_team) %>%
pivot_longer(cols = c("home_team", "away_team"), values_to = "team") %>%
select(sim, team)

teams <- bind_rows(
data.frame(team = unique(games$away_team)),
data.frame(team = unique(games$home_team))
) %>%
distinct() %>%
inner_join(cfbsimR::divisions %>% select(-"sdiv"), by = "team") %>%
left_join(pivot_games, by = "team") %>%
select(sim, everything()) %>%
distinct() %>%
arrange(division, team, sim)
}

# double games
games_doubled <- double_games(games)

# record of each team
report("Calculating team data")
teams <- teams %>%
inner_join(games_doubled, by = c("sim", "team")) %>%
filter(game_type == "REG") %>%
group_by(sim, conf, division, team) %>%
summarize(games = n(), wins = sum(outcome), true_wins = sum(outcome == 1)) %>%
ungroup()

# add in tiebreaker info
teams <- teams %>%
inner_join(games_doubled, by = c("sim", "team")) %>%
filter(game_type == "REG") %>%
inner_join(teams,
by = c("sim" = "sim", "opp" = "team"),
suffix = c("", "_opp")
) %>%
mutate(
win_pct = wins / games,
div_game = ifelse(division == division_opp & conf == conf_opp,
1, 0),
conf_game = ifelse(conf == conf_opp, 1, 0)
) %>%
group_by(sim, conf, division, team, games, wins, true_wins, win_pct) %>%
summarize(
div_pct = ifelse(sum(div_game) == 0, 0.5,
sum(div_game * outcome) / sum(div_game)
),
conf_pct = ifelse(sum(conf_game) == 0, 0.5,
sum(conf_game * outcome) / sum(conf_game)
),
sov = ifelse(sum(outcome == 1) == 0, 0,
sum(wins_opp * (outcome == 1)) /
sum(games_opp * (outcome == 1))
),
sos = sum(wins_opp) / sum(games_opp)
) %>%
ungroup()

# below only if there are tiebreakers
if (is.null(h2h) & tiebreaker_depth > TIEBREAKERS_NONE) {
report("Calculating head to head")
h2h <- teams %>%
select(sim, team) %>%
inner_join(teams %>% select(sim, team),
by = "sim", suffix = c("", "_opp")
) %>%
rename(opp = team_opp) %>%
arrange(sim, team, opp) %>%
left_join(games_doubled %>% filter(game_type == "REG"),
by = c("sim", "team", "opp")
) %>%
group_by(sim, team, opp) %>%
summarize(
h2h_games = sum(!is.na(outcome)),
h2h_wins = sum(outcome, na.rm = TRUE),
h2h_played = ifelse(h2h_games > 0, 1, 0)
) %>%
ungroup()
}

#### FIND DIVISION RANKS ####

# initialize division rank
teams <- teams %>%
filter(team != "FCS") %>%
mutate(div_rank = NA_real_)

# determine division ranks
dr <- 0
while (any(is.na(teams$div_rank))) {
# increment division rank
dr <- dr + 1
report(paste0("Calculating division rank #", dr))

# update teams with this rank
update <- teams %>%
filter(is.na(div_rank)) %>%
group_by(sim, conf, division) %>%
filter(conf_pct == max(conf_pct)) %>%
mutate(div_rank = ifelse(n() == 1, dr, div_rank)) %>%
ungroup() %>%
break_division_ties(dr, h2h = h2h, tb_depth = tiebreaker_depth, .debug = .debug)

# store updates
teams <- teams %>%
left_join(update, by = c("sim", "team")) %>%
mutate(div_rank = ifelse(!is.na(new_rank), new_rank, div_rank)) %>%
select(-new_rank)
}

max_reg_week <- max(games$week[games$game_type == "REG"], na.rm = TRUE)

teams <- teams %>%
mutate(max_reg_week = max_reg_week)

return(list(standings = teams, h2h = h2h))
}
Loading

0 comments on commit 6719ef9

Please sign in to comment.