From 23dd8da17442354ac6e22d05bbd8ae8d72bdb982 Mon Sep 17 00:00:00 2001
From: Menna <menna.zayed@education.gov.uk>
Date: Tue, 15 Oct 2024 13:01:43 +0100
Subject: [PATCH] Create pretty table (#91)

* added suggested solution for pretty_table after testing it and adding more comments

* modified function so it doesn't just take numeric cols so it's more flexible

added tests but still need to add more to them

* changed the code to account for using the latest version of pretty_num

* Improve pretty num (#89) (#90)

* added nsmall args to pretty_num and comma_sep to allow formatting of decimals

* amended the function so it takes multiple values

* updated documenation

* fixed the issue with negative dp being passed to nsmall

* changed code for testing pretty_num

* amended the documentation for comma_sep and changed expected_error to expect_equal in test_pretty_num

* fixed the - dp args problem

added nsmall to the documentation

fixed the lintr styling and complexity errors

* added extra tests to pretty_num

* fixing formtting issues for lint testing

* adding documentation, testing and extra comments for pretty_table

an extra documentation line for pretty_num came up when running workflows

* fixing the example for pretty_table by adding export

* amended the documentation to pretty_table to remove refs to the cols just being numeric, add link to the function family and linked the pretty_num function

* improved the documentation

* changed function name
added data frame test
changed the no rows from a warning to a stop
updated documentation

* updated package version, gave details on what was changed in the news.md and added myself to ctb in the description file

* fixed the bracket mistake in description,
put nsmall is code format and updated documentations

* updated the descriptions file so that i'm no longer stealing rich's academic code
---
 DESCRIPTION                            |   5 +-
 NAMESPACE                              |   1 +
 NEWS.md                                |  10 +++
 R/pretty.R                             |  98 ++++++++++++++++++++++
 man/dfeR-package.Rd                    |   1 +
 man/pretty_filesize.Rd                 |   1 +
 man/pretty_num.Rd                      |   4 +-
 man/pretty_num_table.Rd                |  71 ++++++++++++++++
 man/pretty_time_taken.Rd               |   3 +-
 tests/testthat/test-pretty_num_table.R | 107 +++++++++++++++++++++++++
 10 files changed, 297 insertions(+), 4 deletions(-)
 create mode 100644 man/pretty_num_table.Rd
 create mode 100644 tests/testthat/test-pretty_num_table.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 90a9905..1c2c847 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: dfeR
 Title: Common DfE R tasks
-Version: 0.5.1
+Version: 0.6.0
 Authors@R: c(
     person("Cam", "Race", , "cameron.race@education.gov.uk", role = c("aut", "cre")),
     person("Laura", "Selby", , "laura.selby@education.gov.uk", role = "aut"),
@@ -9,7 +9,8 @@ Authors@R: c(
     person("Jen", "Machin", , "jen.machin@education.gov.uk", role = "ctb"),
     person("Jake", "Tufts", , "jake.tufts@education.gov.uk", role = "ctb"),
     person("Rich", "Bielby", , "richard.bielby@education.gov.uk", role = "ctb",
-           comment = c(ORCID = "0000-0001-9070-9969"))
+           comment = c(ORCID = "0000-0001-9070-9969")),
+    person("Menna", "Zayed", , "menna.zayed@education.gov.uk", role = "ctb")
   )
 Description: This package contains R functions to allow DfE analysts to
     re-use code for common analytical tasks that are undertaken across the
diff --git a/NAMESPACE b/NAMESPACE
index 7bad73c..2c1339e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -16,6 +16,7 @@ export(get_clean_sql)
 export(get_ons_api_data)
 export(pretty_filesize)
 export(pretty_num)
+export(pretty_num_table)
 export(pretty_time_taken)
 export(round_five_up)
 export(toggle_message)
diff --git a/NEWS.md b/NEWS.md
index 612481f..bc8c200 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,13 @@
+# dfeR 0.6.0
+
+Update pretty_num so that: 
+
+- it can take single or multiple values. 
+- it has the argument `nsmall` that allows control over the number of digits displayed after rounding. 
+
+Add pretty_num_table() which uses pretty_num() to format numbers in a readable format in data frames. 
+It has all the customization provided by pretty_num. 
+
 # dfeR 0.5.1
 
 Patch to update the get_clean_sql() function to ignore lines starting with 'USE'.
diff --git a/R/pretty.R b/R/pretty.R
index c481727..a837403 100644
--- a/R/pretty.R
+++ b/R/pretty.R
@@ -265,6 +265,7 @@ pretty_num <- function(
 
     # Add suffix and prefix, plus convert to million or billion
 
+
     # If nsmall is not given, make same value as dp
     # if dp is smaller than 0, make nsmall 0
     # if nsmall is specified, use that value
@@ -313,3 +314,100 @@ pretty_num <- function(
   # unlisting the results so that they're all on one line
   return(unlist(result))
 }
+
+#' Format a data frame with `dfeR::pretty_num()`.
+#'
+#' You can format number and character values in a data frame
+#' by passing arguments to `dfeR::pretty_num()`.
+#' Use parameters `include_columns` or `exclude_columns`
+#' to specify columns for formatting.
+#'
+#' @param data A data frame containing the columns to be formatted.
+#' @param include_columns A character vector specifying which columns to format.
+#' If `NULL` (default), all columns will be considered for formatting.
+#' @param exclude_columns A character vector specifying columns to exclude
+#' from formatting.
+#' If `NULL` (default), no columns will be excluded.
+#' If both `include_columns` and `exclude_columns` are provided
+#' , `include_columns` takes precedence.
+#' @param ... Additional arguments passed to `dfeR::pretty_num()`
+#' , such as `dp` (decimal places)
+#' for controlling the number of decimal points.
+#'
+#' @return A data frame with columns formatted using `dfeR::pretty_num()`.
+#'
+#' @details
+#' The function first checks if any columns are specified for inclusion
+#' via `include_columns`.
+#' If none are provided, it checks if columns are specified for exclusion
+#' via `exclude_columns`.
+#' If neither is specified, all columns in the data frame are formatted.
+#' @family prettying
+#' @seealso [pretty_num()]
+#' @export
+#' @examples
+#' # Example data frame
+#' df <- data.frame(
+#'   a = c(1.234, 5.678, 9.1011),
+#'   b = c(10.1112, 20.1314, 30.1516),
+#'   c = c("A", "B", "C")
+#' )
+#'
+#' # Apply formatting to all columns
+#' pretty_num_table(df, dp = 2)
+#'
+#' # Apply formatting to only selected columns
+#' pretty_num_table(df, include_columns = c("a"), dp = 2)
+#'
+#' # Apply formatting to all columns except specified ones
+#' pretty_num_table(df, exclude_columns = c("b"), dp = 2)
+#'
+#' # Apply formatting to all columns except specified ones and
+#' # provide alternative value for NAs
+#' pretty_num_table(df, alt_na = "[z]", exclude_columns = c("b"), dp = 2)
+#'
+pretty_num_table <- function(data,
+                             include_columns = NULL,
+                             exclude_columns = NULL,
+                             ...) {
+  # Check data is a data frame and throw error if not
+  if (!is.data.frame(data)) {
+    stop(paste0(
+      "Data has the class ", class(data),
+      ", data must be a data.frame object"
+    ))
+  }
+
+  # Check if the data frame has rows - if not, stop the process
+  if (nrow(data) < 1) {
+    stop("Data frame is empty or contains no rows.")
+  }
+
+  # Determine which columns to include based on the provided parameters
+
+  # if the include_columns arg is specified
+  if (!is.null(include_columns)) {
+    # assign the names to the cols_to_include variable
+    cols_to_include <- include_columns
+
+    # if the exclude_columns arg is specified
+  } else if (!is.null(exclude_columns)) {
+    # we assign the cols_to_include to names of all columns
+    # except for ones specified in exclude_columns
+    cols_to_include <- setdiff(
+      names(data),
+      exclude_columns
+    )
+  } else {
+    # if none of the previous conditions are met
+    # all columns are assigned to cols_to_include
+    cols_to_include <- names(data)
+  }
+
+  # Apply pretty_num() formatting to the selected columns
+  data %>%
+    dplyr::mutate(dplyr::across(
+      .cols = dplyr::all_of(cols_to_include),
+      ~ pretty_num(., ...)
+    ))
+}
diff --git a/man/dfeR-package.Rd b/man/dfeR-package.Rd
index d135571..7a25dbb 100644
--- a/man/dfeR-package.Rd
+++ b/man/dfeR-package.Rd
@@ -33,6 +33,7 @@ Other contributors:
   \item Jen Machin \email{jen.machin@education.gov.uk} [contributor]
   \item Jake Tufts \email{jake.tufts@education.gov.uk} [contributor]
   \item Rich Bielby \email{richard.bielby@education.gov.uk} (\href{https://orcid.org/0000-0001-9070-9969}{ORCID}) [contributor]
+  \item Menna Zayed \email{menna.zayed@education.gov.uk} [contributor]
 }
 
 }
diff --git a/man/pretty_filesize.Rd b/man/pretty_filesize.Rd
index ea82c30..1d761a8 100644
--- a/man/pretty_filesize.Rd
+++ b/man/pretty_filesize.Rd
@@ -42,6 +42,7 @@ pretty_filesize(10^9)
 
 Other prettying: 
 \code{\link{pretty_num}()},
+\code{\link{pretty_num_table}()},
 \code{\link{pretty_time_taken}()}
 }
 \concept{prettying}
diff --git a/man/pretty_num.Rd b/man/pretty_num.Rd
index 4436e75..bbe78ed 100644
--- a/man/pretty_num.Rd
+++ b/man/pretty_num.Rd
@@ -62,6 +62,7 @@ pretty_num(564, prefix = "+/-")
 pretty_num(567812343223, gbp = TRUE, prefix = "+/-")
 pretty_num(11^9, gbp = TRUE, dp = 3)
 pretty_num(-11^8, gbp = TRUE, dp = -1)
+pretty_num(43.3, dp = 1, nsmall = 2)
 pretty_num("56.089", suffix = "\%")
 pretty_num("x")
 pretty_num("x", ignore_na = TRUE)
@@ -73,7 +74,7 @@ pretty_num(vector)
 pretty_num(vector, prefix = "+/-", gbp = TRUE)
 
 # Return original values if NA
-pretty_num(vector,ignore_na = TRUE)
+pretty_num(vector, ignore_na = TRUE)
 
 # Return alternative value in place of NA
 pretty_num(vector, alt_na = "z")
@@ -83,6 +84,7 @@ pretty_num(vector, alt_na = "z")
 
 Other prettying: 
 \code{\link{pretty_filesize}()},
+\code{\link{pretty_num_table}()},
 \code{\link{pretty_time_taken}()}
 }
 \concept{prettying}
diff --git a/man/pretty_num_table.Rd b/man/pretty_num_table.Rd
new file mode 100644
index 0000000..4dbb009
--- /dev/null
+++ b/man/pretty_num_table.Rd
@@ -0,0 +1,71 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pretty.R
+\name{pretty_num_table}
+\alias{pretty_num_table}
+\title{Format a data frame with \code{dfeR::pretty_num()}.}
+\usage{
+pretty_num_table(data, include_columns = NULL, exclude_columns = NULL, ...)
+}
+\arguments{
+\item{data}{A data frame containing the columns to be formatted.}
+
+\item{include_columns}{A character vector specifying which columns to format.
+If \code{NULL} (default), all columns will be considered for formatting.}
+
+\item{exclude_columns}{A character vector specifying columns to exclude
+from formatting.
+If \code{NULL} (default), no columns will be excluded.
+If both \code{include_columns} and \code{exclude_columns} are provided
+, \code{include_columns} takes precedence.}
+
+\item{...}{Additional arguments passed to \code{dfeR::pretty_num()}
+, such as \code{dp} (decimal places)
+for controlling the number of decimal points.}
+}
+\value{
+A data frame with columns formatted using \code{dfeR::pretty_num()}.
+}
+\description{
+You can format number and character values in a data frame
+by passing arguments to \code{dfeR::pretty_num()}.
+Use parameters \code{include_columns} or \code{exclude_columns}
+to specify columns for formatting.
+}
+\details{
+The function first checks if any columns are specified for inclusion
+via \code{include_columns}.
+If none are provided, it checks if columns are specified for exclusion
+via \code{exclude_columns}.
+If neither is specified, all columns in the data frame are formatted.
+}
+\examples{
+# Example data frame
+df <- data.frame(
+  a = c(1.234, 5.678, 9.1011),
+  b = c(10.1112, 20.1314, 30.1516),
+  c = c("A", "B", "C")
+)
+
+# Apply formatting to all columns
+pretty_num_table(df, dp = 2)
+
+# Apply formatting to only selected columns
+pretty_num_table(df, include_columns = c("a"), dp = 2)
+
+# Apply formatting to all columns except specified ones
+pretty_num_table(df, exclude_columns = c("b"), dp = 2)
+
+# Apply formatting to all columns except specified ones and
+# provide alternative value for NAs
+pretty_num_table(df, alt_na = "[z]", exclude_columns = c("b"), dp = 2)
+
+}
+\seealso{
+\code{\link[=pretty_num]{pretty_num()}}
+
+Other prettying: 
+\code{\link{pretty_filesize}()},
+\code{\link{pretty_num}()},
+\code{\link{pretty_time_taken}()}
+}
+\concept{prettying}
diff --git a/man/pretty_time_taken.Rd b/man/pretty_time_taken.Rd
index c6321fd..0bee42b 100644
--- a/man/pretty_time_taken.Rd
+++ b/man/pretty_time_taken.Rd
@@ -44,6 +44,7 @@ pretty_time_taken(start, end)
 
 Other prettying: 
 \code{\link{pretty_filesize}()},
-\code{\link{pretty_num}()}
+\code{\link{pretty_num}()},
+\code{\link{pretty_num_table}()}
 }
 \concept{prettying}
diff --git a/tests/testthat/test-pretty_num_table.R b/tests/testthat/test-pretty_num_table.R
new file mode 100644
index 0000000..3c89f3e
--- /dev/null
+++ b/tests/testthat/test-pretty_num_table.R
@@ -0,0 +1,107 @@
+# testing pretty table
+# create data frame for testing
+df <- data.frame(
+  a = c(2.589, -5.8937, "c"),
+  b = c(11.19875, 45.6894, -78.4985),
+  c = c("X", "Y", "Z")
+)
+
+
+test_that("prettifies tables", {
+  expect_equal(pretty_num_table(df), data.frame(
+    a = c("2.59", "-5.89", as.double(NA)),
+    b = c("11.20", "45.69", "-78.50"),
+    c = c(as.double(NA), as.double(NA), as.double(NA))
+  ))
+
+  expect_equal(
+    pretty_num_table(df, gbp = TRUE, exclude_columns = "c"),
+    data.frame(
+      a = c("£2.59", "-£5.89", as.double(NA)),
+      b = c("£11.20", "£45.69", "-£78.50"),
+      c = c("X", "Y", "Z")
+    )
+  )
+
+
+  expect_equal(
+    pretty_num_table(df,
+      suffix = "%", dp = 1, nsmall = 2,
+      exclude_columns = c("b", "c")
+    ),
+    data.frame(
+      a = c("2.60%", "-5.90%", as.double(NA)),
+      b = c(11.19875, 45.6894, -78.4985),
+      c = c("X", "Y", "Z")
+    )
+  )
+
+  expect_equal(
+    pretty_num_table(df,
+      alt_na = "[z]", dp = -1,
+      include_columns = c("a", "b")
+    ),
+    data.frame(
+      a = c("0", "-10", "[z]"),
+      b = c("10", "50", "-80"),
+      c = c("X", "Y", "Z")
+    )
+  )
+
+  expect_equal(
+    pretty_num_table(df,
+      alt_na = "", dp = 2,
+      prefix = "+/-", suffix = "g", include_columns = "a"
+    ),
+    data.frame(
+      a = c("+2.59g", "-5.89g", ""),
+      b = c(11.19875, 45.6894, -78.4985),
+      c = c("X", "Y", "Z")
+    )
+  )
+
+
+  expect_equal(
+    pretty_num_table(df,
+      dp = 2,
+      include_columns = "a", exclude_columns = "b"
+    ),
+    data.frame(
+      a = c("2.59", "-5.89", as.double(NA)),
+      b = c(11.19875, 45.6894, -78.4985),
+      c = c("X", "Y", "Z")
+    )
+  )
+})
+
+# test empty data frame
+
+# create empty data frame for testing
+df <- data.frame(
+  a = character(),
+  b = character(),
+  c = character()
+)
+
+test_that("pretty_num_table with empty data frames", {
+  expect_error(pretty_num_table(df), "Data frame is empty or contains no rows.")
+})
+
+# test non data frame objects
+
+test_that("test non data frames", {
+  expect_error(
+    pretty_num_table(1.12),
+    "Data has the class numeric, data must be a data.frame object"
+  )
+
+  expect_error(
+    pretty_num_table("a"),
+    "Data has the class character, data must be a data.frame object"
+  )
+
+  expect_error(
+    pretty_num_table(c("a", 1.2)),
+    "Data has the class character, data must be a data.frame object"
+  )
+})