From e0e499db8e89727f42925316ce43216aaed37b3f Mon Sep 17 00:00:00 2001
From: Ian D Buller <ian.d.buller@gmail.com>
Date: Mon, 12 Jun 2023 08:45:31 -0400
Subject: [PATCH] :bug: Update to the False Discovery Rate Calculation

* now orders the p-values in ascending order instead of in descending order
---
 README.md        |  7 +++++--
 code/functions.R | 19 +++++++++++--------
 2 files changed, 16 insertions(+), 10 deletions(-)
diff --git a/README.md b/README.md
index 186d30b..8229661 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@ Geographic Patterns in U.S. Lung Cancer Mortality and Cigarette Smoking <img src
 [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
 ![GitHub last commit](https://img.shields.io/github/last-commit/idblr/geo_US_lung_cancer_and_smoking)
 
-**Date repository last updated**: February 06, 2023
+**Date repository last updated**: June 10, 2023
 
 ### Authors
 
@@ -61,7 +61,10 @@ Lung cancer is the leading cause of cancer death in the United States (US) and v
 <td>Manuscript accepted by <a href="https://cebp.aacrjournals.org/">Cancer Epidemiology, Biomarkers & Prevention</a></td>
 </tr>
 <td><p align="center">February 2023</p></td>
-<td>Manuscript published in <a href="https://doi.org/10.1158/1055-9965.EPI-22-0253">Cancer Epidemiology, Biomarkers & Prevention</td>
+<td>Manuscript published in <a href="https://doi.org/10.1158/1055-9965.EPI-22-0253">Cancer Epidemiology, Biomarkers & Prevention</a></td>
+</tr>
+<td><p align="center">June 2023</p></td>
+<td>Update to the False Discovery Rate <a href="https://doi.org/10.1111/j.2517-6161.1995.tb02031.x">(Benjamini & Hochberg, 1995)</a> calculation for multiple testing correction that now orders the p-values in ascending order instead of in descending order.</td>
 </tr>
 </tbody>
 <table>
diff --git a/code/functions.R b/code/functions.R
index 4ca7900..1ed6121 100644
--- a/code/functions.R
+++ b/code/functions.R
@@ -6,12 +6,13 @@
 # Created on: January 15, 2022
 #
 # Most recently modified by: @idblr
-# Most recently modified on: May 17, 2022
+# Most recently modified on: June 5, 2023
 #
 # Notes:
 # A) Code for essential functions to calculate the Lee's L statistic and False Discovery Rate
-# B) 05/17/2022: Consistent variable name based on merged data object; rename NAs as "Suppressed" in LeeL() function
-# C) 05/17/2022: Correctly set "dat" in LeeL() function
+# B) 05/17/2022 (@idblr): Consistent variable name based on merged data object; rename NAs as "Suppressed" in LeeL() function
+# C) 05/17/2022 (@idblr): Correctly set "dat" in LeeL() function
+# D) 06/05/2023 (@idblr): Update to FDR calculation for multiple testing correction
 # ----------------------------------------------------------------------- #
 
 cat("\nLoading custom functions for Lee's L statistic\n")
@@ -19,12 +20,14 @@ cat("\nLoading custom functions for Lee's L statistic\n")
 # Select not in
 `%notin%` <- Negate(`%in%`) # https://www.r-bloggers.com/the-notin-operator/
 
-# False Discovery Rate
-fdr <- function(pvals, alpha = 0.05) {
+# False Discovery Rate (Benjamini & Hochberg, 1995; DOI: 10.1111/j.2517-6161.1995.tb02031.x)
+fdr <- function(pvals, alpha) {
+  pcrit <- NULL
   m <- length(pvals)
-  for (i in 1:length(pvals)) {
-    if (pvals[i] <= (i/m) * alpha) { return(pvals[i]) } 
+  for (i in 1:m) {
+    if (pvals[i] <= (i/m) * alpha) { pcrit <- pvals[i] }
   }
+  return(max(pcrit, pvals[1]))
 }
 
 # Local Lee
@@ -114,7 +117,7 @@ LeeL <- function(dat, x, y, label, numsim = 100,...) {
   
   #### Correction for Multiple Testing
   ##### False Discovery Rate
-  sort_pvals <- sort(as.vector(pvals), decreasing = TRUE)
+  sort_pvals <- sort(as.vector(pvals))
   out_alpha <- fdr(sort_pvals, alpha)
   sig <- pvals < out_alpha