Skip to content

Commit

Permalink
Modified merge_results filter flag to allow for selection of distinct…
Browse files Browse the repository at this point in the history
… or matched records only
  • Loading branch information
gavieira committed Jan 16, 2024
1 parent 663c510 commit 7869cfa
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 15 deletions.
25 changes: 19 additions & 6 deletions R/05-biblioverlap.R
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ biblioverlap <- function(db_list, matching_fields = default_matching_fields, n_t
#' Merge biblioverlap's results into a single dataframe
#'
#' @param db_list - list of matched dataframes (with UUID column added by [`biblioverlap`])
#' @param filter_distinct - boolean value determining whether to return only the subset containing distinct records (TRUE) or to keep overlapping records between datasets (FALSE). Default: FALSE
#' @param filter - value determining whether to return all the data ('none'), distinct records only ('distinct') or matched records only ('matched'). Default: 'none'
#'
#' @return a single dataframe containing data from db_list, featuring an additional 'SET_NAME' column to indicate from which dataset each record came
#' @importFrom rlang .data
Expand All @@ -248,28 +248,41 @@ biblioverlap <- function(db_list, matching_fields = default_matching_fields, n_t
#' #Running document-level matching procedure for two datasets
#' biblioverlap_results <- biblioverlap(ufrj_bio_0122[1:2])
#'
#' #Obtaining the results as a single dataframe (including overlapping records)
#' #Obtaining the results as a single dataframe (all records)
#' all_data <- merge_results(biblioverlap_results$db_list)
#'
#' #Checking number of total rows and overlapping documents are in the dataframe
#' nrow(all_data)
#' sum(duplicated(all_data$UUID))
#'
#' #Obtaining only unique records as a single dataframe
#' distinct_data <- merge_results(biblioverlap_results$db_list, filter_distinct = FALSE)
#' #Obtaining only distinct records as a single dataframe
#' distinct_data <- merge_results(biblioverlap_results$db_list, filter = 'distinct')
#'
#' #Checking number of total rows and overlapping documents are in the dataframe
#' nrow(distinct_data)
#' sum(duplicated(distinct_data$UUID))
#'
merge_results <- function(db_list, filter_distinct = FALSE){
#' #Obtaining only matched records as a single dataframe
#' matched_data <- merge_results(biblioverlap_results$db_list, filter = 'matched')
#'
#' #Checking number of total rows and overlapping documents are in the dataframe
#' nrow(matched_data)
#' sum(duplicated(matched_data$UUID))
#'
merge_results <- function(db_list, filter = 'none') {
db_list <- lapply(db_list, function(df) dplyr::mutate_all(df, as.character))
df <- dplyr::bind_rows(db_list, .id = 'SET_NAME') #Joining all info in a single table, while also adding a new column (SET_NAME) with the name of the set that record comes from
columns_to_front <- c("SET_NAME", "UUID") # Specifying the names of the columns to be moved to the front
df <- df[c(columns_to_front, setdiff(names(df), columns_to_front))] # Rearrange columns
if (filter_distinct) {
if (filter == 'distinct') {
df <- dplyr::distinct(df, .data$UUID, .keep_all = TRUE)
}
else if (filter == 'matched') {
df <- df %>%
dplyr::arrange(.data$UUID) %>%
dplyr::group_by(.data$UUID) %>%
dplyr::filter(dplyr::n() >= 2)
}

return(df)
}
2 changes: 1 addition & 1 deletion inst/biblioverApp/server.R
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ server <- function(input, output, session) {
}

results_data_table <- reactive ({
return ( biblioverlap::merge_results(calculate_results()$db_list, filter_distinct = input$filter_distinct) )
return ( biblioverlap::merge_results(calculate_results()$db_list, filter = input$filter_results) )
} )


Expand Down
12 changes: 10 additions & 2 deletions inst/biblioverApp/ui.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ custom_styles <-
tags$style(
HTML(".custom_title { color: darkcyan; font-family: Arial, sans-serif; text-align: center; font-size: 18px; font-weight: bold; }"),
HTML(".custom_button { background-color: green; color: white; margin-top: 10px; margin-bottom: 10px }"),
HTML(".custom_button_higher_margin { background-color: green; color: white; margin-top: 25px; margin-bottom: 10px }"),
HTML(".plot_container {
width: 100%; /* Set the width of the div as needed */
overflow-x: auto; /* Add a vertical scrollbar if content overflows horizontally */
Expand Down Expand Up @@ -115,8 +116,15 @@ n_sets_ui <-

results_data <- tabPanel("Data",
fluidRow(
column(2, checkboxInput('filter_distinct', 'Filter distinct records')),
column(2, downloadButton("download_data", "Download Data", class = 'custom_button')) ),
column(2, selectInput('filter_results', 'Filter results',
choices = c('Unfiltered' = 'none',
'Distinct' = 'distinct',
'Matched' = 'matched'),
selectize = FALSE,
selected = 'none')
),
column(2, downloadButton("download_data", "Download Data", class = 'custom_button_higher_margin')) ),

DT::dataTableOutput('full_table')
)

Expand Down
19 changes: 13 additions & 6 deletions man/merge_results.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 7869cfa

Please sign in to comment.