From d4847a8bb253def9c8b727d0a5c0ca7b89ffbb32 Mon Sep 17 00:00:00 2001 From: jiaying2508 <38696658+jiaying2508@users.noreply.github.com> Date: Thu, 1 Jun 2023 11:14:14 -0400 Subject: [PATCH 1/4] Update gabowmyers.R --- R/gabowmyers.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/gabowmyers.R b/R/gabowmyers.R index eeff2e1..cb96d16 100644 --- a/R/gabowmyers.R +++ b/R/gabowmyers.R @@ -211,7 +211,7 @@ prepareGraphForGabowMyers <- function(w, chains, input_data) { #' @import tidyr #' @param w matrix of CCF values (rows = clusters, columns = samples) #' @return graph_G tibble of possible edges with columns edge, parent, child -prepareGraph <- function(w_mat) { +prepareGraph <- function(w_mat, thresh) { graph_pre <- data.frame(edge = character(), parent = character(), child = character()) for (i in seq_len(nrow(w_mat))) { graph_pre <- graph_pre %>% add_row(edge = paste("root->", i, sep = ""), parent = "root", child = as.character(i)) @@ -219,7 +219,7 @@ prepareGraph <- function(w_mat) { if (i!=j) { i_row = w_mat[i, ] j_row = w_mat[j, ] - if (all(j_row[i_row > 0] > 0)) { + if (all(j_row-i_row > -thresh)) { graph_pre <- graph_pre %>% add_row(edge = paste(j, "->", i, sep = ""), parent = as.character(j), child = as.character(i)) } } @@ -261,7 +261,7 @@ enumerateSpanningTrees <- function(graph_G, w, sum_filter_thresh=0.2) { generateAllTrees <- function(w, lineage_precedence_thresh=0.1, sum_filter_thresh=0.2) { w_mat <- estimateCCFs(w) w_mat <- assign("w_mat", w_mat, envir = .GlobalEnv) - graph_G_pre <- prepareGraph(w_mat) + graph_G_pre <- prepareGraph(w_mat, lineage_precedence_thresh) graph_G <- filterEdgesBasedOnCCFs(graph_G_pre, w_mat, thresh = lineage_precedence_thresh) graph_G <- assign("graph_G", graph_G, envir = .GlobalEnv) enumerateSpanningTreesModified(graph_G, w_mat, sum_filter_thresh = sum_filter_thresh) From d77317a9943f263dc44d62c8236307883109a889 Mon Sep 17 00:00:00 2001 From: jiaying2508 <38696658+jiaying2508@users.noreply.github.com> Date: Thu, 1 Jun 2023 11:14:28 -0400 Subject: [PATCH 2/4] Update preprocessing.R --- R/preprocessing.R | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index af5b733..5ebf26c 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -2,7 +2,7 @@ #' #' @export #' @param input_file input data file; -importCSV <- function(inputFile) { +importCSV <- function(inputFile, alt_reads_thresh = 5, vaf_thresh = 0.02) { data <- read_csv(inputFile, show_col_types = FALSE) output_data <- list() @@ -36,7 +36,7 @@ importCSV <- function(inputFile) { output_data$n[output_data$n==0] <- round(mean(output_data$n)) } - output_data$tcn <- as.matrix(data[c("mutation", "sample", "tumor_integer_copy_number")] %>% pivot_wider(names_from = sample, values_from = tumor_integer_copy_number, values_fill = 0)) + output_data$tcn <- as.matrix(data[c("mutation", "sample", "tumor_integer_copy_number")] %>% pivot_wider(names_from = sample, values_from = tumor_integer_copy_number, values_fill = 2)) rownames(output_data$tcn) <- output_data$tcn[,'mutation'] output_data$tcn <- output_data$tcn[,-1, drop=FALSE] rowname = rownames(output_data$tcn) @@ -45,7 +45,7 @@ importCSV <- function(inputFile) { rownames(output_data$tcn) = rowname colnames(output_data$tcn) = colname - output_data$purity <- as.matrix(data[c("mutation", "sample", "purity")] %>% pivot_wider(names_from = sample, values_from = purity)) + output_data$purity <- as.matrix(data[c("mutation", "sample", "purity")] %>% pivot_wider(names_from = sample, values_from = purity, values_fill = 0)) rownames(output_data$purity) <- output_data$purity[,'mutation'] output_data$purity <- output_data$purity[,-1, drop=FALSE] rowname = rownames(output_data$purity) @@ -53,21 +53,22 @@ importCSV <- function(inputFile) { output_data$purity <- matrix(as.numeric(output_data$purity), ncol = ncol(output_data$purity)) rownames(output_data$purity) = rowname colnames(output_data$purity) = colname - if (ncol(output_data$purity) == 1) { - if (length(unique(output_data$purity[,])) != 1) { - warning("purity not consistent for the same sample; taking the mean purity") - output_data$purity <- round(colMeans(output_data$purity), digit=2) - } else { - output_data$purity <- unique(output_data$purity[,]) - } - } else { - if (nrow(unique(output_data$purity[,])) != 1) { - warning("Purity not consistent for the same sample; taking the mean purity") - output_data$purity <- round(colMeans(output_data$purity), digit=2) - } else { - output_data$purity <- unique(output_data$purity[,])[1,] - } - } + output_data$purity = colSums(output_data$purity) / colSums(!!output_data$purity) + # if (ncol(output_data$purity) == 1) { + # if (length(unique(output_data$purity[,])) != 1) { + # warning("purity not consistent for the same sample; taking the mean purity") + # output_data$purity <- round(colMeans(output_data$purity), digit=2) + # } else { + # output_data$purity <- unique(output_data$purity[,]) + # } + # } else { + # if (nrow(unique(output_data$purity[,])) != 1) { + # warning("Purity not consistent for the same sample; taking the mean purity") + # output_data$purity <- round(colMeans(output_data$purity), digit=2) + # } else { + # output_data$purity <- unique(output_data$purity[,])[1,] + # } + # } output_data$S = ncol(output_data$y) output_data$I = nrow(output_data$y) @@ -88,6 +89,8 @@ importCSV <- function(inputFile) { output_data$m <- estimateMultiplicityMatrix(output_data) } + output_data$y[output_data$y / output_data$n < vaf_thresh] = 0 + output_data$y[output_data$y <= alt_reads_thresh] = 0 return(output_data) } From 07a3acba2b9228dec2276b8bf88992526ed7910e Mon Sep 17 00:00:00 2001 From: jiaying2508 <38696658+jiaying2508@users.noreply.github.com> Date: Mon, 5 Jun 2023 11:37:53 -0400 Subject: [PATCH 3/4] Update preprocessing.R change equal sign --- R/preprocessing.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 5ebf26c..27b2419 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -2,7 +2,7 @@ #' #' @export #' @param input_file input data file; -importCSV <- function(inputFile, alt_reads_thresh = 5, vaf_thresh = 0.02) { +importCSV <- function(inputFile, alt_reads_thresh = 6, vaf_thresh = 0.02) { data <- read_csv(inputFile, show_col_types = FALSE) output_data <- list() @@ -90,7 +90,7 @@ importCSV <- function(inputFile, alt_reads_thresh = 5, vaf_thresh = 0.02) { } output_data$y[output_data$y / output_data$n < vaf_thresh] = 0 - output_data$y[output_data$y <= alt_reads_thresh] = 0 + output_data$y[output_data$y < alt_reads_thresh] = 0 return(output_data) } From d1b89362597826dd46994574bc3670ce4bd47642 Mon Sep 17 00:00:00 2001 From: jiaying2508 <38696658+jiaying2508@users.noreply.github.com> Date: Mon, 5 Jun 2023 11:38:14 -0400 Subject: [PATCH 4/4] Update pictograph.Rmd add input read threshold to remove potential false positives --- vignettes/pictograph.Rmd | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vignettes/pictograph.Rmd b/vignettes/pictograph.Rmd index 006749c..34bbe5d 100644 --- a/vignettes/pictograph.Rmd +++ b/vignettes/pictograph.Rmd @@ -31,10 +31,11 @@ The required user input is a csv file that contains at least columns named "samp head(read.csv(system.file('extdata/example_input.csv', package = 'pictograph'))) ``` -The input file can be read using the importCSV function. Alt read count (y), total read count (n), tumor copy number (tcn), and multiplicity (m) are stored in matrices where the columns are samples, and rows are variants. Purity is supplied as a vector. I and S are integers representing the number of variants and number of samples, respectively. +The input file can be read using the importCSV function. Alt read count (y), total read count (n), tumor copy number (tcn), and multiplicity (m) are stored in matrices where the columns are samples, and rows are variants. Purity is supplied as a vector. I and S are integers representing the number of variants and number of samples, respectively. The function will also pre-process the data to remove potential false positive read counts. By default, a mutation with alt reads less than 6 or a VAF less than 2% is considered a false positive. Users can change the thresholds using the `alt_reads_thresh` and `vaf_thresh` parameters. ```{r input_data} -input_data <- importCSV(system.file('extdata/example_input.csv', package = 'pictograph')) +input_data <- importCSV(system.file('extdata/example_input.csv', package = 'pictograph'), + alt_reads_thresh = 6, vaf_thresh = 0.02) ``` ```{r load_results, include = F}