show_best tests for censored regression models (#156)

* initial tests for #118 * return 5 configs * missing %>% * reduce changes of ties affecting results * trying to avoid irreproducible results across runs and OS'es * Apply suggestions from code review Co-authored-by: Hannah Frick <[email protected]> * update for latest tune version * updated snapshots * updated snapshots with new current package versions --------- Co-authored-by: ‘topepo’ <‘[email protected]’> Co-authored-by: Hannah Frick <[email protected]>
tidymodels · Jan 17, 2024 · c7bc3bd · c7bc3bd
1 parent 7c6d473
commit c7bc3bd
Show file tree

Hide file tree

Showing 9 changed files with 499 additions and 16 deletions.
diff --git a/tests/testthat/_snaps/survival-tune-show-best.md b/tests/testthat/_snaps/survival-tune-show-best.md
@@ -0,0 +1,183 @@
+# show_best with censored data - integrated metric - grid
+
+    Code
+      show_best(grid_int_res)
+    Condition
+      Warning in `show_best()`:
+      No value of `metric` was given; "brier_survival_integrated" will be used.
+    Output
+      # A tibble: 5 x 7
+        cost_complexity .metric               .estimator    mean     n std_err .config
+                  <dbl> <chr>                 <chr>        <dbl> <int>   <dbl> <chr>  
+      1        0.000126 brier_survival_integ~ standard   0.00791    10 0.00137 Prepro~
+      2        0.000251 brier_survival_integ~ standard   0.00793    10 0.00137 Prepro~
+      3        0.000200 brier_survival_integ~ standard   0.00794    10 0.00136 Prepro~
+      4        0.000158 brier_survival_integ~ standard   0.00796    10 0.00134 Prepro~
+      5        0.000316 brier_survival_integ~ standard   0.00797    10 0.00152 Prepro~
+
+# show_best with censored data - dynamic metric - bayes
+
+    Code
+      show_best(bayes_dyn_res)
+    Condition
+      Warning in `show_best()`:
+      No value of `metric` was given; "brier_survival" will be used.
+    Output
+      # A tibble: 5 x 9
+        cost_complexity .metric     .estimator .eval_time   mean     n std_err .config
+                  <dbl> <chr>       <chr>           <dbl>  <dbl> <int>   <dbl> <chr>  
+      1        1.26e- 9 brier_surv~ standard          100 0.0114    10 0.00312 Prepro~
+      2        1.31e- 6 brier_surv~ standard          100 0.0114    10 0.00312 Prepro~
+      3        3.55e- 8 brier_surv~ standard          100 0.0114    10 0.00312 Iter2  
+      4        1.00e-10 brier_surv~ standard          100 0.0114    10 0.00312 Iter3  
+      5        3.91e- 5 brier_surv~ standard          100 0.0114    10 0.00312 Prepro~
+      # i 1 more variable: .iter <int>
+
+---
+
+    Code
+      show_best(bayes_dyn_res, metric = "brier_survival", eval_time = 1)
+    Condition
+      Error in `show_best()`:
+      ! Evaluation time 1 is not in the results.
+
+---
+
+    Code
+      show_best(bayes_dyn_res, metric = "brier_survival_integrated")
+    Condition
+      Error in `show_best()`:
+      ! "brier_survival_integrated" was not in the metric set. Please choose from: "brier_survival".
+
+# show_best with censored data - static metric - anova racing
+
+    Code
+      show_best(race_stc_res)
+    Condition
+      Warning in `show_best()`:
+      No value of `metric` was given; "concordance_survival" will be used.
+    Output
+      # A tibble: 1 x 7
+        cost_complexity .metric              .estimator  mean     n std_err .config   
+                  <dbl> <chr>                <chr>      <dbl> <int>   <dbl> <chr>     
+      1         0.00001 concordance_survival standard   0.278    10  0.0147 Preproces~
+
+---
+
+    Code
+      show_best(race_stc_res, metric = "concordance_survival", eval_time = 1)
+    Condition
+      Warning in `show_best()`:
+      An evaluation time is only required when a dynamic metric is selected (and `eval_time` will thus be ignored).
+    Output
+      # A tibble: 1 x 7
+        cost_complexity .metric              .estimator  mean     n std_err .config   
+                  <dbl> <chr>                <chr>      <dbl> <int>   <dbl> <chr>     
+      1         0.00001 concordance_survival standard   0.278    10  0.0147 Preproces~
+
+---
+
+    Code
+      show_best(race_stc_res, metric = "brier_survival_integrated")
+    Condition
+      Warning:
+      Metric "concordance_survival" was used to evaluate model candidates in the race but "brier_survival_integrated" has been chosen to rank the candidates. These results may not agree with the race.
+      Error in `show_best()`:
+      ! "brier_survival_integrated" was not in the metric set. Please choose from: "concordance_survival".
+
+# show_best with censored data - static metric (+dyn) - W/L racing
+
+    Code
+      show_best(race_stc_res)
+    Condition
+      Warning in `show_best()`:
+      No value of `metric` was given; "concordance_survival" will be used.
+    Output
+      # A tibble: 5 x 8
+        cost_complexity .metric      .estimator .eval_time  mean     n std_err .config
+                  <dbl> <chr>        <chr>           <dbl> <dbl> <int>   <dbl> <chr>  
+      1       0.0706    concordance~ standard           NA 0.297    10  0.0118 Prepro~
+      2       0.0000128 concordance~ standard           NA 0.278    10  0.0147 Prepro~
+      3       0.0000591 concordance~ standard           NA 0.275    10  0.0152 Prepro~
+      4       0.0000959 concordance~ standard           NA 0.274    10  0.0149 Prepro~
+      5       0.000374  concordance~ standard           NA 0.256    10  0.0133 Prepro~
+
+---
+
+    Code
+      show_best(race_stc_res, metric = "concordance_survival", eval_time = 1)
+    Condition
+      Warning in `show_best()`:
+      An evaluation time is only required when a dynamic metric is selected (and `eval_time` will thus be ignored).
+    Output
+      # A tibble: 5 x 8
+        cost_complexity .metric      .estimator .eval_time  mean     n std_err .config
+                  <dbl> <chr>        <chr>           <dbl> <dbl> <int>   <dbl> <chr>  
+      1       0.0706    concordance~ standard           NA 0.297    10  0.0118 Prepro~
+      2       0.0000128 concordance~ standard           NA 0.278    10  0.0147 Prepro~
+      3       0.0000591 concordance~ standard           NA 0.275    10  0.0152 Prepro~
+      4       0.0000959 concordance~ standard           NA 0.274    10  0.0149 Prepro~
+      5       0.000374  concordance~ standard           NA 0.256    10  0.0133 Prepro~
+
+---
+
+    Code
+      show_best(race_stc_res, metric = "brier_survival_integrated")
+    Condition
+      Warning:
+      Metric "concordance_survival" was used to evaluate model candidates in the race but "brier_survival_integrated" has been chosen to rank the candidates. These results may not agree with the race.
+      Error in `show_best()`:
+      ! "brier_survival_integrated" was not in the metric set. Please choose from: "concordance_survival" and "brier_survival".
+
+# show_best with censored data - dyn metric (+stc) - W/L racing
+
+    Code
+      show_best(race_dyn_res)
+    Condition
+      Warning in `show_best()`:
+      No value of `metric` was given; "brier_survival" will be used.
+    Output
+      # A tibble: 5 x 8
+        cost_complexity .metric     .estimator .eval_time   mean     n std_err .config
+                  <dbl> <chr>       <chr>           <dbl>  <dbl> <int>   <dbl> <chr>  
+      1       0.0000591 brier_surv~ standard          100 0.0110    10 0.00314 Prepro~
+      2       0.0000959 brier_surv~ standard          100 0.0110    10 0.00317 Prepro~
+      3       0.0000128 brier_surv~ standard          100 0.0114    10 0.00312 Prepro~
+      4       0.000374  brier_surv~ standard          100 0.0114    10 0.00340 Prepro~
+      5       0.000822  brier_surv~ standard          100 0.0124    10 0.00319 Prepro~
+
+---
+
+    Code
+      show_best(race_dyn_res, metric = "concordance_survival")
+    Condition
+      Warning:
+      Metric "brier_survival" was used to evaluate model candidates in the race but "concordance_survival" has been chosen to rank the candidates. These results may not agree with the race.
+    Output
+      # A tibble: 5 x 8
+        cost_complexity .metric      .estimator .eval_time  mean     n std_err .config
+                  <dbl> <chr>        <chr>           <dbl> <dbl> <int>   <dbl> <chr>  
+      1       0.0000128 concordance~ standard           NA 0.278    10  0.0147 Prepro~
+      2       0.0000591 concordance~ standard           NA 0.275    10  0.0152 Prepro~
+      3       0.0000959 concordance~ standard           NA 0.274    10  0.0149 Prepro~
+      4       0.000374  concordance~ standard           NA 0.256    10  0.0133 Prepro~
+      5       0.000822  concordance~ standard           NA 0.238    10  0.0182 Prepro~
+
+---
+
+    Code
+      show_best(race_dyn_res, metric = "brier_survival", eval_time = 1)
+    Condition
+      Error in `show_best()`:
+      ! Evaluation time 1 is not in the results.
+
+---
+
+    Code
+      show_best(race_dyn_res, metric = "brier_survival_integrated")
+    Condition
+      Warning:
+      Metric "brier_survival" was used to evaluate model candidates in the race but "brier_survival_integrated" has been chosen to rank the candidates. These results may not agree with the race.
+      Error in `show_best()`:
+      ! "brier_survival_integrated" was not in the metric set. Please choose from: "brier_survival" and "concordance_survival".
+
diff --git a/tests/testthat/_snaps/survival-tune_race_anova.md b/tests/testthat/_snaps/survival-tune_race_anova.md
@@ -40,11 +40,11 @@
       # A tibble: 5 x 8
         cost_complexity .metric      .estimator .eval_time  mean     n std_err .config
                   <dbl> <chr>        <chr>           <dbl> <dbl> <int>   <dbl> <chr>  
-      1          0.0841 brier_survi~ standard            1 0.201    30 0.00415 Prepro~
-      2          0.0891 brier_survi~ standard            1 0.201    30 0.00415 Prepro~
-      3          0.0944 brier_survi~ standard            1 0.201    30 0.00415 Prepro~
-      4          0.1    brier_survi~ standard            1 0.201    30 0.00415 Prepro~
-      5          0.0794 brier_survi~ standard            1 0.202    30 0.00418 Prepro~
+      1    0.0000000001 brier_survi~ standard            1 0.177    30 0.00707 Prepro~
+      2    0.0841       brier_survi~ standard            1 0.201    30 0.00415 Prepro~
+      3    0.0891       brier_survi~ standard            1 0.201    30 0.00415 Prepro~
+      4    0.0944       brier_survi~ standard            1 0.201    30 0.00415 Prepro~
+      5    0.1          brier_survi~ standard            1 0.201    30 0.00415 Prepro~
 
 ---
 
@@ -65,11 +65,11 @@
       # A tibble: 5 x 8
         cost_complexity .metric      .estimator .eval_time  mean     n std_err .config
                   <dbl> <chr>        <chr>           <dbl> <dbl> <int>   <dbl> <chr>  
-      1          0.0841 brier_survi~ standard            1 0.201    30 0.00415 Prepro~
-      2          0.0891 brier_survi~ standard            1 0.201    30 0.00415 Prepro~
-      3          0.0944 brier_survi~ standard            1 0.201    30 0.00415 Prepro~
-      4          0.1    brier_survi~ standard            1 0.201    30 0.00415 Prepro~
-      5          0.0794 brier_survi~ standard            1 0.202    30 0.00418 Prepro~
+      1    0.0000000001 brier_survi~ standard            1 0.177    30 0.00707 Prepro~
+      2    0.0841       brier_survi~ standard            1 0.201    30 0.00415 Prepro~
+      3    0.0891       brier_survi~ standard            1 0.201    30 0.00415 Prepro~
+      4    0.0944       brier_survi~ standard            1 0.201    30 0.00415 Prepro~
+      5    0.1          brier_survi~ standard            1 0.201    30 0.00415 Prepro~
 
 ---
 
@@ -82,9 +82,9 @@
       # A tibble: 5 x 8
         cost_complexity .metric      .estimator .eval_time  mean     n std_err .config
                   <dbl> <chr>        <chr>           <dbl> <dbl> <int>   <dbl> <chr>  
-      1          0.0794 brier_survi~ standard           NA 0.338    30 0.00487 Prepro~
-      2          0.0841 brier_survi~ standard           NA 0.338    30 0.00480 Prepro~
-      3          0.0891 brier_survi~ standard           NA 0.338    30 0.00480 Prepro~
-      4          0.0944 brier_survi~ standard           NA 0.338    30 0.00480 Prepro~
-      5          0.1    brier_survi~ standard           NA 0.338    30 0.00480 Prepro~
+      1    0.0000000001 brier_survi~ standard           NA 0.285    30 0.00426 Prepro~
+      2    0.0794       brier_survi~ standard           NA 0.338    30 0.00487 Prepro~
+      3    0.0841       brier_survi~ standard           NA 0.338    30 0.00480 Prepro~
+      4    0.0891       brier_survi~ standard           NA 0.338    30 0.00480 Prepro~
+      5    0.0944       brier_survi~ standard           NA 0.338    30 0.00480 Prepro~
 
diff --git a/tests/testthat/_snaps/survival-tune_race_anova/aov-race-plot.png b/tests/testthat/_snaps/survival-tune_race_anova/aov-race-plot.png
diff --git a/tests/testthat/_snaps/survival-tune_race_anova/dyn-aov-race-plot.png b/tests/testthat/_snaps/survival-tune_race_anova/dyn-aov-race-plot.png
diff --git a/tests/testthat/_snaps/survival-tune_race_anova/int-aov-race-plot.png b/tests/testthat/_snaps/survival-tune_race_anova/int-aov-race-plot.png
diff --git a/tests/testthat/_snaps/survival-tune_race_anova/int-aov-racing.png b/tests/testthat/_snaps/survival-tune_race_anova/int-aov-racing.png
diff --git a/tests/testthat/helper-cens-churn.R b/tests/testthat/helper-cens-churn.R
@@ -0,0 +1,33 @@
+
+make_churn_cens_objects <- function(x) {
+  suppressPackageStartupMessages(require("tidymodels"))
+  suppressPackageStartupMessages(require("censored"))
+
+  data("mlc_churn")
+
+  mlc_churn <-
+    mlc_churn %>%
+    mutate(
+      churned = ifelse(churn == "yes", 1, 0),
+      event_time = survival::Surv(account_length, churned)
+    ) %>%
+    select(event_time, account_length, area_code, total_eve_calls)
+
+  set.seed(6941)
+  churn_split <- initial_split(mlc_churn)
+  churn_tr <- training(churn_split)
+  churn_te <- testing(churn_split)
+  churn_rs <- vfold_cv(churn_tr)
+
+  eval_times <- c(50, 100, 150)
+
+  churn_rec <-
+    recipe(event_time ~ ., data = churn_tr) %>%
+    step_dummy(area_code) %>%
+    step_normalize(all_predictors())
+
+  list(split = churn_split, train = churn_tr, test = churn_te,
+       rs = churn_rs, times = eval_times, rec = churn_rec)
+
+}
+
diff --git a/tests/testthat/test-parsnip-survival-censoring-weights.R b/tests/testthat/test-parsnip-survival-censoring-weights.R
@@ -222,7 +222,7 @@ test_that("error messages in context of .censoring_weights_graf()", {
 
 test_that("error for .censoring_weights_graf.workflow()", {
   # temporarily its own test, see above
-  skip_if_not_installed("workflows", minimum_version = "1.1.3.9001")
+  skip_if_not_installed("workflows", minimum_version = "1.1.3.9007")
   expect_snapshot(error = TRUE, .censoring_weights_graf(workflows::workflow()))
 })