From e78cf316c86648da2af0fb8a5f84b63de6cca738 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Tue, 8 Oct 2019 22:01:55 +0200 Subject: [PATCH 01/32] feat: add .orderEdges --- R/joinGraph.R | 26 ++++++++++++++++++++++++++ tests/testthat/test_joinGraph.R | 8 ++++++++ 2 files changed, 34 insertions(+) create mode 100644 R/joinGraph.R create mode 100644 tests/testthat/test_joinGraph.R diff --git a/R/joinGraph.R b/R/joinGraph.R new file mode 100644 index 00000000..1066a3d6 --- /dev/null +++ b/R/joinGraph.R @@ -0,0 +1,26 @@ +#' @title (Re)order edges +#' +#' @description +#' Ensures matrix/list with edges is ordered increasingly and gaps are filled +#' with `NA` +#' +#' @param x `numeric`, values to be matched, e.g. m/z from spectrum 1. +#' @param y `numeric`, values to be matched, e.g. m/z from spectrum 2. +#' @param e `list`, of length two (`x`, `y`) with edges +#' +#' @return A `list` with two columns, namely `x` and `y`, +#' representing the index of the values in `x` matching the corresponding value +#' in `y` (or `NA` if the value do not match). +#' +#' @examples +#' x <- c(100.1, 100.2, 300, 500) +#' y <- c(100, 200, 299.9, 300.1, 505) +#' e <- .edgeList(x, y, tolerance = 0.2) +#' .orderEdges(x, y, e) +.orderEdges <- function(x, y, e) { + na <- is.na(e[[1L]]) + xe <- x[e[[1L]]] + xe[na] <- y[e[[2L]][na]] + o <- order(xe, method = "radix") + list(x = e[[1L]][o], y = e[[2L]][o]) +} diff --git a/tests/testthat/test_joinGraph.R b/tests/testthat/test_joinGraph.R new file mode 100644 index 00000000..7841709b --- /dev/null +++ b/tests/testthat/test_joinGraph.R @@ -0,0 +1,8 @@ +test_that(".orderEdges", { + x <- c(100.1, 100.2, 300, 500) + y <- c(100, 200, 300.1) + e <- list(x = c(1, 2, 3, 4, NA), y = c(1, 1, 3, NA, 2)) + o <- list(x = c(1, 2, NA, 3, 4), y = c(1, 1, 2, 3, NA)) + + expect_equal(.orderEdges(x, y, e), o) +}) From 9c0514618eb83631edef9ac18840f5361ce01da2 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Tue, 8 Oct 2019 22:02:16 +0200 Subject: [PATCH 02/32] refactor: use .orderEdges for .joinOuter --- R/matching.R | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/R/matching.R b/R/matching.R index ca021959..19931b66 100644 --- a/R/matching.R +++ b/R/matching.R @@ -185,7 +185,7 @@ common <- function(x, table, tolerance = Inf, ppm = 0, #' @note `join` is based on `closest(x, y, tolerance, duplicates = "closest")`. #' That means for multiple matches just the closest one is reported. #' -#' @return `join` returns a `matrix` with two columns, namely `x` and `y`, +#' @return `join` returns a `list` with two columns, namely `x` and `y`, #' representing the index of the values in `x` matching the corresponding value #' in `y` (or `NA` if the value does not match). #' @@ -249,19 +249,16 @@ join <- function(x, y, tolerance = 0, ppm = 0, nx <- length(x) ny <- length(y) nlx <- length(ji[[1L]]) - xy <- xys <- c(x, y) - ## equalise values that are identified as common - if (nlx) { - xy[nx + ji[[2L]]] <- xy[ji[[1L]]] - xys <- xy[-(nx + ji[[2L]])] - } - ## find position - i <- findInterval(xy, sort.int(xys)) - ## fill gaps with NA - ox <- oy <- rep.int(NA_integer_, nx + ny - nlx) sx <- seq_len(nx) sy <- seq_len(ny) - ox[i[sx]] <- sx - oy[i[nx + sy]] <- sy - list(x = ox, y = oy) + ox <- oy <- rep.int(NA_integer_, nx + ny - nlx) + if (nlx) { + ox[sx] <- c(ji[[1L]], sx[-ji[[1L]]]) + oy[c(seq_len(nlx), nx + seq_len(ny - nlx))] <- + c(ji[[2L]], sy[-ji[[2L]]]) + } else { + ox[sx] <- sx + oy[nx + sy] <- sy + } + .orderEdges(x, y, list(x = ox, y = oy)) } From 020ccce258334d9b578c13a3c2be2fc0eb9ba413 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Tue, 8 Oct 2019 22:02:53 +0200 Subject: [PATCH 03/32] feat: add .edgeList --- R/joinGraph.R | 32 ++++++++++++++++++++++++++++++++ tests/testthat/test_joinGraph.R | 10 ++++++++++ 2 files changed, 42 insertions(+) diff --git a/R/joinGraph.R b/R/joinGraph.R index 1066a3d6..1dd923e0 100644 --- a/R/joinGraph.R +++ b/R/joinGraph.R @@ -1,3 +1,35 @@ +#' @title Create Edge List Matrix +#' +#' @description +#' This function creates a two-column matrix edge list of an undirected graph. +#' +#' @param x `numeric`, values to be matched, e.g. m/z from spectrum 1. +#' @param y `numeric`, values to be matched, e.g. m/z from spectrum 2. +#' @param tolerance `numeric`, accepted tolerance. Could be of length one or +#' the same length as `table`. +#' @param ppm `numeric(1)` representing a relative, value-specific +#' parts-per-million (PPM) tolerance that is added to `tolerance`. +#' @param na.rm `logical(1)` should rows with NA removed from the results? +#' (necessary for [`igraph::graph_from_edge_list()`]. +#' +#' @return A two-column `matrix` with the undirected edge positions, for +#' [igraph::graph_from_edge_list()`] the indices in the `y` column have to be +#' increased by `length(x)` and the `NA` values (no match) have to be removed +#' manually. +#' @examples +#' x <- c(100.1, 100.2, 300, 500) +#' y <- c(100, 200, 299.9, 300.1, 505) +#' .edgeList(x, y, tolerance = 0.2) +.edgeList <- function(x, y, tolerance = 0, ppm = 0, na.rm = FALSE) { + xy <- closest(x, y, tolerance = tolerance, ppm = ppm, duplicates = "keep") + yx <- closest(y, x, tolerance = tolerance, ppm = ppm, duplicates = "keep") + + # switching the direction of the second match (yx) to allow using duplicated + # to remove multiple edges (we use undirected graphs anyway) + e <- cbind(x = c(seq_along(x), yx), y = c(xy, seq_along(y))) + e[!duplicated(e), ] +} + #' @title (Re)order edges #' #' @description diff --git a/tests/testthat/test_joinGraph.R b/tests/testthat/test_joinGraph.R index 7841709b..c0537ff7 100644 --- a/tests/testthat/test_joinGraph.R +++ b/tests/testthat/test_joinGraph.R @@ -1,3 +1,13 @@ +test_that(".edgeList", { + + x <- c(100.1, 100.2, 300, 500) + y <- c(100, 200, 300.1) + e <- matrix(c(1, 2, 3, 4, NA, 1, 1, 3, NA, 2), ncol = 2, + dimnames = list(c(), c("x", "y"))) + + expect_equal(.edgeList(x, y, tolerance = 0.2, ppm = 0), e) +}) + test_that(".orderEdges", { x <- c(100.1, 100.2, 300, 500) y <- c(100, 200, 300.1) From 0a588ac1f74f3fe8d4de2b123744c36dda43722f Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Wed, 9 Oct 2019 20:21:27 +0200 Subject: [PATCH 04/32] docs: fix join return value --- man/matching.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/matching.Rd b/man/matching.Rd index 1b07dcca..4e0c79b6 100644 --- a/man/matching.Rd +++ b/man/matching.Rd @@ -46,7 +46,7 @@ there is no match. \code{common} returns a \code{logical} vector of length \code{x} that is \code{TRUE} if the element in \code{x} was found in \code{table}. It is similar to \code{\link{\%in\%}}. -\code{join} returns a \code{matrix} with two columns, namely \code{x} and \code{y}, +\code{join} returns a \code{list} with two columns, namely \code{x} and \code{y}, representing the index of the values in \code{x} matching the corresponding value in \code{y} (or \code{NA} if the value does not match). } From 6bd7553057537194e40adce161adf7cabf887671 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Wed, 9 Oct 2019 20:23:21 +0200 Subject: [PATCH 05/32] docs: don't create man pages for internal functions --- R/joinGraph.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/joinGraph.R b/R/joinGraph.R index 1dd923e0..5b140106 100644 --- a/R/joinGraph.R +++ b/R/joinGraph.R @@ -16,6 +16,7 @@ #' [igraph::graph_from_edge_list()`] the indices in the `y` column have to be #' increased by `length(x)` and the `NA` values (no match) have to be removed #' manually. +#' @noRd #' @examples #' x <- c(100.1, 100.2, 300, 500) #' y <- c(100, 200, 299.9, 300.1, 505) @@ -44,6 +45,7 @@ #' representing the index of the values in `x` matching the corresponding value #' in `y` (or `NA` if the value do not match). #' +#' @noRd #' @examples #' x <- c(100.1, 100.2, 300, 500) #' y <- c(100, 200, 299.9, 300.1, 505) From ba7e4500ee93904315ede3541d76bbcec130b6d9 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Wed, 9 Oct 2019 20:23:56 +0200 Subject: [PATCH 06/32] perf: create list instead of matrix --- R/joinGraph.R | 15 ++++++++------- tests/testthat/test_joinGraph.R | 3 +-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/R/joinGraph.R b/R/joinGraph.R index 5b140106..c898b5be 100644 --- a/R/joinGraph.R +++ b/R/joinGraph.R @@ -12,10 +12,10 @@ #' @param na.rm `logical(1)` should rows with NA removed from the results? #' (necessary for [`igraph::graph_from_edge_list()`]. #' -#' @return A two-column `matrix` with the undirected edge positions, for -#' [igraph::graph_from_edge_list()`] the indices in the `y` column have to be -#' increased by `length(x)` and the `NA` values (no match) have to be removed -#' manually. +#' @return A `list` with the undirected edge positions, for +#' [igraph::graph_from_edge_list()`] the `list` has to be `rbind`ed, the +#' indices in the `y` column have to be increased by `length(x)` and the `NA` +#' values (no match) have to be removed manually. #' @noRd #' @examples #' x <- c(100.1, 100.2, 300, 500) @@ -27,14 +27,15 @@ # switching the direction of the second match (yx) to allow using duplicated # to remove multiple edges (we use undirected graphs anyway) - e <- cbind(x = c(seq_along(x), yx), y = c(xy, seq_along(y))) - e[!duplicated(e), ] + e <- mapply(c, c(seq_along(x), yx), c(xy, seq_along(y)), SIMPLIFY = FALSE) + e <- e[!duplicated(e)] + split(unlist(e), c("x", "y")) } #' @title (Re)order edges #' #' @description -#' Ensures matrix/list with edges is ordered increasingly and gaps are filled +#' Ensures list with edges is ordered increasingly and gaps are filled #' with `NA` #' #' @param x `numeric`, values to be matched, e.g. m/z from spectrum 1. diff --git a/tests/testthat/test_joinGraph.R b/tests/testthat/test_joinGraph.R index c0537ff7..9379aa62 100644 --- a/tests/testthat/test_joinGraph.R +++ b/tests/testthat/test_joinGraph.R @@ -2,8 +2,7 @@ test_that(".edgeList", { x <- c(100.1, 100.2, 300, 500) y <- c(100, 200, 300.1) - e <- matrix(c(1, 2, 3, 4, NA, 1, 1, 3, NA, 2), ncol = 2, - dimnames = list(c(), c("x", "y"))) + e <- list(x = c(1, 2, 3, 4, NA), y = c(1, 1, 3, NA, 2)) expect_equal(.edgeList(x, y, tolerance = 0.2, ppm = 0), e) }) From 8e4a7885e0f718b9d09b4829a6cbb498b5bb8191 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Wed, 9 Oct 2019 20:38:14 +0200 Subject: [PATCH 07/32] feat: add .anyCrossing --- R/joinGraph.R | 21 +++++++++++++++++++++ tests/testthat/test_joinGraph.R | 5 +++++ 2 files changed, 26 insertions(+) diff --git a/R/joinGraph.R b/R/joinGraph.R index c898b5be..d4a7d18e 100644 --- a/R/joinGraph.R +++ b/R/joinGraph.R @@ -1,3 +1,24 @@ +#' @title Crossing Edges +#' +#' @description +#' This function tests for crossing edges. +#' +#' @param x `list` to test +#' +#' @return `logical`, `TRUE` if at least one crossing edge was found, otherwise +#' `FALSE`. +#' @noRd +#' @examples +#' .anyCrossing(list(x = 1:3, y = c(NA, 1:2))) +#' .anyCrossing(list(x = 1:3, y = c(2, 1, NA))) +.anyCrossing <- function(x) { + for (i in seq_along(x)) { + if (is.unsorted(x[[i]], na.rm = TRUE)) + return(TRUE) + } + FALSE +} + #' @title Create Edge List Matrix #' #' @description diff --git a/tests/testthat/test_joinGraph.R b/tests/testthat/test_joinGraph.R index 9379aa62..81495d1e 100644 --- a/tests/testthat/test_joinGraph.R +++ b/tests/testthat/test_joinGraph.R @@ -15,3 +15,8 @@ test_that(".orderEdges", { expect_equal(.orderEdges(x, y, e), o) }) + +test_that(".anyCrossing", { + expect_false(.anyCrossing(list(x = 1:3, y = c(NA, 1:2)))) + expect_true(.anyCrossing(list(x = 1:3, y = c(2, 1, NA)))) +}) From 40534a4a15b81b071e14b509c97255cb1916c0dc Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Wed, 9 Oct 2019 21:16:30 +0200 Subject: [PATCH 08/32] feat: add .edgeGroups --- R/joinGraph.R | 21 +++++++++++++++++++++ tests/testthat/test_joinGraph.R | 10 ++++++++++ 2 files changed, 31 insertions(+) diff --git a/R/joinGraph.R b/R/joinGraph.R index d4a7d18e..6213ea57 100644 --- a/R/joinGraph.R +++ b/R/joinGraph.R @@ -19,6 +19,27 @@ FALSE } +#' @title Find Edge Groups +#' +#' @description +#' This function finds edges that belong to the same group. A group is definied +#' by at least one identical point for following edges. It assumes that the +#' edge list is ordered. +#' +#' @param e `list` with edges +#' @return `integer` group values +#' @noRd +#' @examples +#' .edgeGroups(list(x = c(1, 2, NA, 3, 4, 4, 5), y = c(1, 1, 2, 3, 3, 4, 4))) +.edgeGroups <- function(e) { + n <- lengths(e) + if (!is.list(e) || n[1L] != n[2L]) + stop("'e' has to be a list with two elements of equal length.") + gx <- e[[1L]][-1L] != e[[1L]][-n[1L]] + gy <- e[[2L]][-1L] != e[[2L]][-n[1L]] + pmin(cumsum(c(TRUE, gx | is.na(gx))), cumsum(c(TRUE, gy | is.na(gy)))) +} + #' @title Create Edge List Matrix #' #' @description diff --git a/tests/testthat/test_joinGraph.R b/tests/testthat/test_joinGraph.R index 81495d1e..8ea06258 100644 --- a/tests/testthat/test_joinGraph.R +++ b/tests/testthat/test_joinGraph.R @@ -1,3 +1,13 @@ +test_that(".edgeGroups", { + e1 <- list(x = c(1, 2, NA, 3, 4, 4, 5), y = c(1, 1, 2, 3, 3, 4, 4)) + e2 <- list(x = e1$y, y = e1$x) + g <- c(1, 1, 2, 3, 3, 4, 4) + + expect_error(.edgeGroups(list(x = 1, y = 1:2)), "length") + expect_equal(.edgeGroups(e1), g) + expect_equal(.edgeGroups(e2), g) +}) + test_that(".edgeList", { x <- c(100.1, 100.2, 300, 500) From 4215ab1bf43c7c6e718ffd661211d29028fed3c1 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Wed, 9 Oct 2019 22:08:23 +0200 Subject: [PATCH 09/32] feat: add .combinations --- R/joinGraph.R | 27 +++++++++++++++++++++++++++ tests/testthat/test_joinGraph.R | 20 +++++++++++++++----- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/R/joinGraph.R b/R/joinGraph.R index 6213ea57..2688a6de 100644 --- a/R/joinGraph.R +++ b/R/joinGraph.R @@ -19,6 +19,33 @@ FALSE } +#' @title Find all possible combinations +#' +#' @description +#' Similar to `expand.grid` but expects a `numeric` vector as input and returns +#' the indices. +#' +#' @param `x` `integer`, group numbers +#' @return `list`, each element represents a possible combination +#' @noRd +#' @examples +#' .combinations(c(1, 2, 2, 2, 3, 3)) +.combinations <- function(x) { + r <- rle(x) + ncs <- cumsum(c(0L, r$lengths)) + ncmb <- prod(r$lengths) + times <- 1L + l <- vector(mode = "list", length = length(r)) + + for (i in seq_along(r$lengths)) { + n <- r$lengths[i] + ncmb <- ncmb / n + l[[i]] <- rep.int(rep.int(ncs[i] + seq_len(n), rep.int(times, n)), ncmb) + times <- times * n + } + split(unlist(l), seq_along(l[[1L]])) +} + #' @title Find Edge Groups #' #' @description diff --git a/tests/testthat/test_joinGraph.R b/tests/testthat/test_joinGraph.R index 8ea06258..adcd64ea 100644 --- a/tests/testthat/test_joinGraph.R +++ b/tests/testthat/test_joinGraph.R @@ -1,3 +1,18 @@ +test_that(".combinations", { + g <- c(1, 2, 2, 2, 3, 3) + l <- list( + "1" = c(1, 2, 5), "2" = c(1, 3, 5), "3" = c(1, 4, 5), + "4" = c(1, 2, 6), "5" = c(1, 3, 6), "6" = c(1, 4, 6) + ) + + expect_equal(.combinations(g), l) +}) + +test_that(".anyCrossing", { + expect_false(.anyCrossing(list(x = 1:3, y = c(NA, 1:2)))) + expect_true(.anyCrossing(list(x = 1:3, y = c(2, 1, NA)))) +}) + test_that(".edgeGroups", { e1 <- list(x = c(1, 2, NA, 3, 4, 4, 5), y = c(1, 1, 2, 3, 3, 4, 4)) e2 <- list(x = e1$y, y = e1$x) @@ -25,8 +40,3 @@ test_that(".orderEdges", { expect_equal(.orderEdges(x, y, e), o) }) - -test_that(".anyCrossing", { - expect_false(.anyCrossing(list(x = 1:3, y = c(NA, 1:2)))) - expect_true(.anyCrossing(list(x = 1:3, y = c(2, 1, NA)))) -}) From 4205cefed43dbd1e02187c3c6ed667c11da46ddc Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Thu, 10 Oct 2019 12:24:06 +0200 Subject: [PATCH 10/32] feat: add .transposeList --- R/joinGraph.R | 24 ++++++++++++++++++++++-- tests/testthat/test_joinGraph.R | 12 ++++++++++-- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/R/joinGraph.R b/R/joinGraph.R index 2688a6de..a07b2272 100644 --- a/R/joinGraph.R +++ b/R/joinGraph.R @@ -43,7 +43,7 @@ l[[i]] <- rep.int(rep.int(ncs[i] + seq_len(n), rep.int(times, n)), ncmb) times <- times * n } - split(unlist(l), seq_along(l[[1L]])) + .transposeList(l) } #' @title Find Edge Groups @@ -98,7 +98,7 @@ # to remove multiple edges (we use undirected graphs anyway) e <- mapply(c, c(seq_along(x), yx), c(xy, seq_along(y)), SIMPLIFY = FALSE) e <- e[!duplicated(e)] - split(unlist(e), c("x", "y")) + setNames(.transposeList(e), c("x", "y")) } #' @title (Re)order edges @@ -128,3 +128,23 @@ o <- order(xe, method = "radix") list(x = e[[1L]][o], y = e[[2L]][o]) } + +#' @title Transpose List +#' +#' @description +#' Transpose a `n * m` `list` into an `m * n` one. +#' +#' @param x `list` +#' @return `list` +#' @noRd +#' @examples +#' .transposeList(list(a = 1:10, b = 11:20, c = 21:30)) +.transposeList <- function(x) { + n <- unique(lengths(x)) + + if (!is.list(x) || length(n) != 1L) + stop("'e' has to be a list with elements of equal length.") + l <- split(unlist(x, use.names = FALSE), seq_len(n)) + names(l) <- NULL + l +} diff --git a/tests/testthat/test_joinGraph.R b/tests/testthat/test_joinGraph.R index adcd64ea..8246f425 100644 --- a/tests/testthat/test_joinGraph.R +++ b/tests/testthat/test_joinGraph.R @@ -1,8 +1,8 @@ test_that(".combinations", { g <- c(1, 2, 2, 2, 3, 3) l <- list( - "1" = c(1, 2, 5), "2" = c(1, 3, 5), "3" = c(1, 4, 5), - "4" = c(1, 2, 6), "5" = c(1, 3, 6), "6" = c(1, 4, 6) + c(1, 2, 5), c(1, 3, 5), c(1, 4, 5), + c(1, 2, 6), c(1, 3, 6), c(1, 4, 6) ) expect_equal(.combinations(g), l) @@ -40,3 +40,11 @@ test_that(".orderEdges", { expect_equal(.orderEdges(x, y, e), o) }) + +test_that(".transposeList", { + l <- list(a = 1:10, b = 11:20, c = 21:30) + r <- mapply(c, 1:10, 11:20, 21:30, SIMPLIFY = FALSE) + + expect_error(.transposeList(list(a = 1:3, b = 1:10)), "length") + expect_equal(.transposeList(l), r) +}) From 066be789d72b98eb2d9920e90545562bf543b371 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Mon, 14 Oct 2019 13:12:07 +0200 Subject: [PATCH 11/32] tests: add joinGraph unit test --- tests/testthat/test_joinGraph.R | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test_joinGraph.R b/tests/testthat/test_joinGraph.R index 8246f425..c20fbe13 100644 --- a/tests/testthat/test_joinGraph.R +++ b/tests/testthat/test_joinGraph.R @@ -1,3 +1,21 @@ +test_that("joinGraph", { + x <- matrix( + c(100.001, 100.002, 300.01, 300.02, 1, 9, 1, 9), + ncol = 2L, dimnames = list(c(), c("mz", "intensity")) + ) + y <- matrix( + c(100.0, 200.0, 300.002, 300.025, 300.0255, 9, 1, 1, 9, 1), + ncol = 2L, dimnames = list(c(), c("mz", "intensity")) + ) + l <- list(x = c(NA, 2, NA, 4), y = c(1, NA, NA, 4, NA)) + expect_equal(joinGraph(x, y), l) +}) + +test_that(".anyCrossing", { + expect_false(.anyCrossing(list(x = 1:3, y = c(NA, 1:2)))) + expect_true(.anyCrossing(list(x = 1:3, y = c(2, 1, NA)))) +}) + test_that(".combinations", { g <- c(1, 2, 2, 2, 3, 3) l <- list( @@ -8,11 +26,6 @@ test_that(".combinations", { expect_equal(.combinations(g), l) }) -test_that(".anyCrossing", { - expect_false(.anyCrossing(list(x = 1:3, y = c(NA, 1:2)))) - expect_true(.anyCrossing(list(x = 1:3, y = c(2, 1, NA)))) -}) - test_that(".edgeGroups", { e1 <- list(x = c(1, 2, NA, 3, 4, 4, 5), y = c(1, 1, 2, 3, 3, 4, 4)) e2 <- list(x = e1$y, y = e1$x) From 341b6adf977fcbba18127350cc84300a9927005a Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Wed, 16 Oct 2019 20:57:49 +0200 Subject: [PATCH 12/32] fix: remove useless na.rm argument --- R/joinGraph.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/joinGraph.R b/R/joinGraph.R index a07b2272..9bd1c9e3 100644 --- a/R/joinGraph.R +++ b/R/joinGraph.R @@ -90,7 +90,7 @@ #' x <- c(100.1, 100.2, 300, 500) #' y <- c(100, 200, 299.9, 300.1, 505) #' .edgeList(x, y, tolerance = 0.2) -.edgeList <- function(x, y, tolerance = 0, ppm = 0, na.rm = FALSE) { +.edgeList <- function(x, y, tolerance = 0, ppm = 0) { xy <- closest(x, y, tolerance = tolerance, ppm = ppm, duplicates = "keep") yx <- closest(y, x, tolerance = tolerance, ppm = ppm, duplicates = "keep") From f0d288e42d33254c0e2041647ed94199f2bbea4a Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Wed, 16 Oct 2019 20:59:28 +0200 Subject: [PATCH 13/32] fix: correctly handle mutliple NA --- R/joinGraph.R | 12 +++++++++--- tests/testthat/test_joinGraph.R | 8 ++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/R/joinGraph.R b/R/joinGraph.R index 9bd1c9e3..5707afa7 100644 --- a/R/joinGraph.R +++ b/R/joinGraph.R @@ -62,9 +62,15 @@ n <- lengths(e) if (!is.list(e) || n[1L] != n[2L]) stop("'e' has to be a list with two elements of equal length.") - gx <- e[[1L]][-1L] != e[[1L]][-n[1L]] - gy <- e[[2L]][-1L] != e[[2L]][-n[1L]] - pmin(cumsum(c(TRUE, gx | is.na(gx))), cumsum(c(TRUE, gy | is.na(gy)))) + + ## na.rm = FALSE is important here. Otherwise duplicated indices that arn't + ## groups could occur. + g <- pmin(e[[1L]], e[[2L]], na.rm = FALSE) + + ## that's more or less the same as rle but ignores values. + ## So it takes less memory and is slightly faster + g <- g[-1L] != g[-n[1L]] + cumsum(c(TRUE, g | is.na(g))) } #' @title Create Edge List Matrix diff --git a/tests/testthat/test_joinGraph.R b/tests/testthat/test_joinGraph.R index c20fbe13..e1e06d6b 100644 --- a/tests/testthat/test_joinGraph.R +++ b/tests/testthat/test_joinGraph.R @@ -34,6 +34,14 @@ test_that(".edgeGroups", { expect_error(.edgeGroups(list(x = 1, y = 1:2)), "length") expect_equal(.edgeGroups(e1), g) expect_equal(.edgeGroups(e2), g) + + e3 <- list(x = c(1, 2, NA, NA, 3, 4, 4), y = c(1, 1, 2, 3, NA, 4, 5)) + g <- c(1, 1, 2, 3, 4, 5, 5) + expect_equal(.edgeGroups(e3), g) + + e4 <- list(x = c(1, 2, NA, NA, 3, 3, 3), y = c(1, 1, 2, 3, 4, 5, 6)) + g <- c(1, 1, 2, 3, 4, 4, 4) + expect_equal(.edgeGroups(e4), g) }) test_that(".edgeList", { From 517e865f9e404f77482095a548f1b7932bf2b751 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Wed, 16 Oct 2019 21:50:59 +0200 Subject: [PATCH 14/32] refactor: use vectors as input for .anyCrossing --- R/joinGraph.R | 15 ++++++--------- tests/testthat/test_joinGraph.R | 4 ++-- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/R/joinGraph.R b/R/joinGraph.R index 5707afa7..aa2367ab 100644 --- a/R/joinGraph.R +++ b/R/joinGraph.R @@ -3,20 +3,17 @@ #' @description #' This function tests for crossing edges. #' -#' @param x `list` to test +#' @param x `numeric` +#' @param y `numeric` #' #' @return `logical`, `TRUE` if at least one crossing edge was found, otherwise #' `FALSE`. #' @noRd #' @examples -#' .anyCrossing(list(x = 1:3, y = c(NA, 1:2))) -#' .anyCrossing(list(x = 1:3, y = c(2, 1, NA))) -.anyCrossing <- function(x) { - for (i in seq_along(x)) { - if (is.unsorted(x[[i]], na.rm = TRUE)) - return(TRUE) - } - FALSE +#' .anyCrossing(x = 1:3, y = c(NA, 1:2)) +#' .anyCrossing(x = 1:3, y = c(2, 1, NA)) +.anyCrossing <- function(x, y) { + is.unsorted(x, na.rm = TRUE) || is.unsorted(y, na.rm = TRUE) } #' @title Find all possible combinations diff --git a/tests/testthat/test_joinGraph.R b/tests/testthat/test_joinGraph.R index e1e06d6b..e0622b77 100644 --- a/tests/testthat/test_joinGraph.R +++ b/tests/testthat/test_joinGraph.R @@ -12,8 +12,8 @@ test_that("joinGraph", { }) test_that(".anyCrossing", { - expect_false(.anyCrossing(list(x = 1:3, y = c(NA, 1:2)))) - expect_true(.anyCrossing(list(x = 1:3, y = c(2, 1, NA)))) + expect_false(.anyCrossing(x = 1:3, y = c(NA, 1:2))) + expect_true(.anyCrossing(x = 1:3, y = c(2, 1, NA))) }) test_that(".combinations", { From 0bd04da3eb68fc5a6a648c373cc0e7722aa348d1 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Thu, 24 Oct 2019 23:12:51 +0200 Subject: [PATCH 15/32] feat: add is{Precursor,Follower}Identical --- R/joinGraph.R | 22 ++++++++++++++++++++++ tests/testthat/test_joinGraph.R | 12 ++++++++++++ 2 files changed, 34 insertions(+) diff --git a/R/joinGraph.R b/R/joinGraph.R index aa2367ab..c9a30fa0 100644 --- a/R/joinGraph.R +++ b/R/joinGraph.R @@ -104,6 +104,28 @@ setNames(.transposeList(e), c("x", "y")) } +#' @title Follower/Prev Identical +#' +#' @description +#' Tests whether the previous/following element in a vector is identical. +#' +#' @param x vector +#' +#' @return `logical` +#' @noRd +#' @examples +#' x <- c(1, 1, NA, 3, 4, 4, 5, 6, 6, 6, NA, 7, 8, 8) +#' .isFollowerIdentical(x) +#' .isPrecursorIdentical(x) +.isFollowerIdentical <- function(x) { + x <- x[-1L] == x[-length(x)] + c(x & !is.na(x), FALSE) +} +.isPrecursorIdentical <- function(x) { + x <- x[-1L] == x[-length(x)] + c(FALSE, x & !is.na(x)) +} + #' @title (Re)order edges #' #' @description diff --git a/tests/testthat/test_joinGraph.R b/tests/testthat/test_joinGraph.R index e0622b77..41b86dd3 100644 --- a/tests/testthat/test_joinGraph.R +++ b/tests/testthat/test_joinGraph.R @@ -53,6 +53,18 @@ test_that(".edgeList", { expect_equal(.edgeList(x, y, tolerance = 0.2, ppm = 0), e) }) +test_that(".is{Precursor,Follower}Identical", { + x <- c(1, 1, NA, 3, 4, 4, 5, 6, 6, 6, NA, 7, 8, 8) + expect_equal(.isFollowerIdentical(x), c( + TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, + TRUE, FALSE, FALSE, FALSE, TRUE, FALSE) + ) + expect_equal(.isPrecursorIdentical(x), c( + FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, + TRUE, TRUE, FALSE, FALSE, FALSE, TRUE) + ) +}) + test_that(".orderEdges", { x <- c(100.1, 100.2, 300, 500) y <- c(100, 200, 300.1) From b40dcd334c966ea54dcdec5c7220352443cb3b54 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Thu, 24 Oct 2019 23:13:06 +0200 Subject: [PATCH 16/32] feat: add .edgeGroupFrom --- R/joinGraph.R | 30 ++++++++++++++++++++++++++++++ tests/testthat/test_joinGraph.R | 23 +++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/R/joinGraph.R b/R/joinGraph.R index c9a30fa0..918f896c 100644 --- a/R/joinGraph.R +++ b/R/joinGraph.R @@ -70,6 +70,36 @@ cumsum(c(TRUE, g | is.na(g))) } +#' @title Find Origin of Edge Group +#' +#' @description +#' Finds the index of the list (x or y) to which the group belongs to. +#' +#' @param e `list`, edge list +#' @param g `numeric` group vector +#' +#' @return `numeric`, `1` if `x` was lower than `y`, otherwise `2`. If `x` == +#' `y` the decision of the previous/next element is returned +#' +#' @seealso .edgeGroups +#' +#' @noRd +#' @examples +#' e <- list(x = c(1, 2, NA, 3, 4, 4, 5), y = c(1, 1, 2, 3, 3, 4, 4)) +#' .edgeGroupFrom (e$x, e$y) +.edgeGroupFrom <- function(e, g = .edgeGroups(e)) { + if (!is.list(e) || length(e[[1L]]) != length(e[[2L]])) + stop("'e' has to be a list with two elements of equal length.") + if (length(e[[1L]]) != length(g)) + stop("'g' has to be of the same length as the elements in 'e'.") + + 2L - ( + (!is.na(e[[1L]]) & is.na(e[[2L]])) | + (.isPrecursorIdentical(e[[1L]]) & .isPrecursorIdentical(g)) | + (.isFollowerIdentical(e[[1L]]) & .isFollowerIdentical(g)) + ) +} + #' @title Create Edge List Matrix #' #' @description diff --git a/tests/testthat/test_joinGraph.R b/tests/testthat/test_joinGraph.R index 41b86dd3..61d1ca45 100644 --- a/tests/testthat/test_joinGraph.R +++ b/tests/testthat/test_joinGraph.R @@ -44,6 +44,29 @@ test_that(".edgeGroups", { expect_equal(.edgeGroups(e4), g) }) +test_that(".edgeGroupFrom", { + expect_error(.edgeGroupFrom(list(1:2, 1:3)), "length") + expect_error(.edgeGroupFrom(list(1:2, 1:2), 1:3), "length") + + e1 <- list(x = c(1, 2, NA, 3, 4, 4, 5), y = c(1, 1, 2, 3, 3, 4, 4)) + g1 <- c(1, 1, 2, 3, 3, 4, 4) + + expect_equal(.edgeGroupFrom(e1, g1), rep_len(2, length(e1$x))) + expect_equal(.edgeGroupFrom(e1[2:1], g1), rep_len(1, length(e1$x))) + + e2 <- list(x = c(1, 2, NA, 3, 4, 5, 5), y = c(1, 1, 2, 3, 3, 5, 6)) + g2 <- c(1, 1, 2, 3, 3, 4, 4) + expect_equal(.edgeGroupFrom(e2, g2), c(2, 2, 2, 2, 2, 1, 1)) + + e3 <- list(x = c(1, 2, NA, NA, 3, 4, 4), y = c(1, 1, 2, 3, NA, 4, 5)) + g3 <- c(1, 1, 2, 3, 4, 5, 5) + expect_equal(.edgeGroupFrom(e3, g3), c(2, 2, 2, 2, 1, 1, 1)) + + e4 <- list(x = c(1, 2, NA, NA, 3, 3, 3), y = c(1, 1, 2, 3, 4, 5, 6)) + g4 <- c(1, 1, 2, 3, 4, 4, 4) + expect_equal(.edgeGroupFrom(e4, g4), c(2, 2, 2, 2, 1, 1, 1)) +}) + test_that(".edgeList", { x <- c(100.1, 100.2, 300, 500) From ed28fc70878804884c671cd9981801414bd0c4a1 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Thu, 24 Oct 2019 23:18:16 +0200 Subject: [PATCH 17/32] tests: add additional unit test for joinGraph --- tests/testthat/test_joinGraph.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/testthat/test_joinGraph.R b/tests/testthat/test_joinGraph.R index 61d1ca45..08fa940d 100644 --- a/tests/testthat/test_joinGraph.R +++ b/tests/testthat/test_joinGraph.R @@ -9,6 +9,9 @@ test_that("joinGraph", { ) l <- list(x = c(NA, 2, NA, 4), y = c(1, NA, NA, 4, NA)) expect_equal(joinGraph(x, y), l) + + l <- list(x = c(NA, 2, NA, 4), y = c(1, NA, NA, 4, NA)) + expect_equal(joinGraph(x, y, ppm = 20), l) }) test_that(".anyCrossing", { From 01cd68388318afa7a43bf862475e31b4b72f1d84 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Fri, 25 Oct 2019 10:32:37 +0200 Subject: [PATCH 18/32] feat: throw an error for too many combinations --- R/joinGraph.R | 4 ++++ tests/testthat/test_joinGraph.R | 3 +++ 2 files changed, 7 insertions(+) diff --git a/R/joinGraph.R b/R/joinGraph.R index 918f896c..629df8b5 100644 --- a/R/joinGraph.R +++ b/R/joinGraph.R @@ -31,6 +31,10 @@ r <- rle(x) ncs <- cumsum(c(0L, r$lengths)) ncmb <- prod(r$lengths) + + if (ncmb > 1e10) + stop("too many possible combinations.") + times <- 1L l <- vector(mode = "list", length = length(r)) diff --git a/tests/testthat/test_joinGraph.R b/tests/testthat/test_joinGraph.R index 08fa940d..814b9db7 100644 --- a/tests/testthat/test_joinGraph.R +++ b/tests/testthat/test_joinGraph.R @@ -20,6 +20,8 @@ test_that(".anyCrossing", { }) test_that(".combinations", { + expect_error(.combinations(rep(1:100, each = 2)), "too many combinations") + g <- c(1, 2, 2, 2, 3, 3) l <- list( c(1, 2, 5), c(1, 3, 5), c(1, 4, 5), @@ -27,6 +29,7 @@ test_that(".combinations", { ) expect_equal(.combinations(g), l) + expect_equal(.combinations(g), l) }) test_that(".edgeGroups", { From 99f479d242839f410b2fc3a56ec9affe8d93fb2e Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Fri, 25 Oct 2019 10:33:02 +0200 Subject: [PATCH 19/32] tests: fix joinGraph unit tests --- tests/testthat/test_joinGraph.R | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test_joinGraph.R b/tests/testthat/test_joinGraph.R index 814b9db7..564ee337 100644 --- a/tests/testthat/test_joinGraph.R +++ b/tests/testthat/test_joinGraph.R @@ -7,10 +7,16 @@ test_that("joinGraph", { c(100.0, 200.0, 300.002, 300.025, 300.0255, 9, 1, 1, 9, 1), ncol = 2L, dimnames = list(c(), c("mz", "intensity")) ) - l <- list(x = c(NA, 2, NA, 4), y = c(1, NA, NA, 4, NA)) - expect_equal(joinGraph(x, y), l) + l <- list( + x = c(NA, 1, 2, NA, NA, 3, 4, NA, NA), + y = c(1, NA, NA, 2, 3, NA, NA, 4, 5) + ) + expect_equal(joinGraph(x, y, tolerance = 0, ppm = 0), l) - l <- list(x = c(NA, 2, NA, 4), y = c(1, NA, NA, 4, NA)) + l <- list( + x = c(1, 2, NA, NA, 3, 4, NA), + y = c(NA, 1, 2, 3, NA, 4, 5) + ) expect_equal(joinGraph(x, y, ppm = 20), l) }) From 94c1b3b8c6f0b4f5de963ca64cb4ca2815ad2b95 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Fri, 25 Oct 2019 10:33:56 +0200 Subject: [PATCH 20/32] feat: add joinGraph --- R/joinGraph.R | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/R/joinGraph.R b/R/joinGraph.R index 629df8b5..3fe1d4c4 100644 --- a/R/joinGraph.R +++ b/R/joinGraph.R @@ -1,3 +1,67 @@ +#' @rdname matching +#' +#' @param FUN `function`, similarity function that should be maximized. +#' @param \dots further arguments passed to `FUN`. +#' +#' @details +#' `joinGraph`: joins two `matrix` by mapping values in `x` with +#' values in `y` and *vice versa* if they are similar enough (provided the +#' `tolerance` and `ppm` specified). For multiple matches in `x` or `y` all +#' possible combinations are evaluated using the similarity function `FUN`. The +#' combination that yield the highest return value of `FUN` is used for the final +#' match. +#' +#' @return `joinGraph` returns a `list` with two columns, namely `x` and `y`, +#' representing the index of the values in `x` matching the corresponding value +#' in `y` (or `NA` if the value does not match). +#' +#' @export +#' @examples +#' +#' x <- matrix( +#' c(100.001, 100.002, 300.01, 300.02, 1, 9, 1, 9), +#' ncol = 2L, dimnames = list(c(), c("mz", "intensity")) +#' ) +#' y <- matrix( +#' c(100.0, 200.0, 300.002, 300.025, 300.0255, 9, 1, 1, 9, 1), +#' ncol = 2L, dimnames = list(c(), c("mz", "intensity")) +#' ) +#' joinGraph(x, y, ppm = 20) +joinGraph <- function(x, y, tolerance = 0, ppm = 0, FUN = dotproduct, ...) { + validPeaksMatrix(x) + validPeaksMatrix(y) + FUN <- match.fun(FUN) + + e <- .edgeList(x[, 1L], y[, 1L], tolerance = tolerance, ppm = ppm) + e <- .orderEdges(x[, 1L], y[, 1L], e) + g <- .edgeGroups(e) + ei <- .edgeGroupFrom(e, g) + gi <- which(.isPrecursorIdentical(g) | .isFollowerIdentical(g)) + + if (!length(gi)) + return(e) + + cmb <- .combinations(g[gi]) + + namask <- edges <- cbind(e[[1L]], e[[2L]]) + namask[cbind(gi, ei[gi])] <- NA_real_ + + score <- vapply1d(cmb, function(i) { + ii <- namask + ii[gi[i],] <- edges[gi[i],] + xx <- x[ii[, 1L],] + yy <- y[ii[, 2L],] + if (.anyCrossing(xx[, 1L], yy[, 1L])) + 0 + else + FUN(xx, yy, ...) + }) + cmb <- cmb[[which.max(score)]] + + namask[gi[cmb],] <- edges[gi[cmb],] + list(x = namask[, 1L], y = namask[, 2L]) +} + #' @title Crossing Edges #' #' @description From d5ff969e80dd2f480d84308c7d3a9b2976df5002 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Fri, 25 Oct 2019 10:36:10 +0200 Subject: [PATCH 21/32] docs: add author information --- R/joinGraph.R | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/R/joinGraph.R b/R/joinGraph.R index 3fe1d4c4..0315d127 100644 --- a/R/joinGraph.R +++ b/R/joinGraph.R @@ -15,6 +15,7 @@ #' representing the index of the values in `x` matching the corresponding value #' in `y` (or `NA` if the value does not match). #' +#' @author Sebastian Gibb, Thomas Naake #' @export #' @examples #' @@ -72,6 +73,7 @@ joinGraph <- function(x, y, tolerance = 0, ppm = 0, FUN = dotproduct, ...) { #' #' @return `logical`, `TRUE` if at least one crossing edge was found, otherwise #' `FALSE`. +#' @author Sebastian Gibb #' @noRd #' @examples #' .anyCrossing(x = 1:3, y = c(NA, 1:2)) @@ -88,6 +90,7 @@ joinGraph <- function(x, y, tolerance = 0, ppm = 0, FUN = dotproduct, ...) { #' #' @param `x` `integer`, group numbers #' @return `list`, each element represents a possible combination +#' @author Sebastian Gibb #' @noRd #' @examples #' .combinations(c(1, 2, 2, 2, 3, 3)) @@ -120,6 +123,7 @@ joinGraph <- function(x, y, tolerance = 0, ppm = 0, FUN = dotproduct, ...) { #' #' @param e `list` with edges #' @return `integer` group values +#' @author Sebastian Gibb #' @noRd #' @examples #' .edgeGroups(list(x = c(1, 2, NA, 3, 4, 4, 5), y = c(1, 1, 2, 3, 3, 4, 4))) @@ -149,6 +153,7 @@ joinGraph <- function(x, y, tolerance = 0, ppm = 0, FUN = dotproduct, ...) { #' @return `numeric`, `1` if `x` was lower than `y`, otherwise `2`. If `x` == #' `y` the decision of the previous/next element is returned #' +#' @author Sebastian Gibb #' @seealso .edgeGroups #' #' @noRd @@ -186,6 +191,7 @@ joinGraph <- function(x, y, tolerance = 0, ppm = 0, FUN = dotproduct, ...) { #' [igraph::graph_from_edge_list()`] the `list` has to be `rbind`ed, the #' indices in the `y` column have to be increased by `length(x)` and the `NA` #' values (no match) have to be removed manually. +#' @author Sebastian Gibb #' @noRd #' @examples #' x <- c(100.1, 100.2, 300, 500) @@ -210,6 +216,7 @@ joinGraph <- function(x, y, tolerance = 0, ppm = 0, FUN = dotproduct, ...) { #' @param x vector #' #' @return `logical` +#' @author Sebastian Gibb #' @noRd #' @examples #' x <- c(1, 1, NA, 3, 4, 4, 5, 6, 6, 6, NA, 7, 8, 8) @@ -238,6 +245,7 @@ joinGraph <- function(x, y, tolerance = 0, ppm = 0, FUN = dotproduct, ...) { #' representing the index of the values in `x` matching the corresponding value #' in `y` (or `NA` if the value do not match). #' +#' @author Sebastian Gibb #' @noRd #' @examples #' x <- c(100.1, 100.2, 300, 500) @@ -259,6 +267,7 @@ joinGraph <- function(x, y, tolerance = 0, ppm = 0, FUN = dotproduct, ...) { #' #' @param x `list` #' @return `list` +#' @author Sebastian Gibb #' @noRd #' @examples #' .transposeList(list(a = 1:10, b = 11:20, c = 21:30)) From b5160872cc7e56da969c6e454a403470f368b7d4 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Fri, 25 Oct 2019 10:40:31 +0200 Subject: [PATCH 22/32] chore: move some joinGraph helper functions to utils.R --- R/joinGraph.R | 76 --------------------------------- R/utils.R | 76 +++++++++++++++++++++++++++++++++ tests/testthat/test_joinGraph.R | 33 -------------- tests/testthat/test_utils.R | 32 ++++++++++++++ 4 files changed, 108 insertions(+), 109 deletions(-) diff --git a/R/joinGraph.R b/R/joinGraph.R index 0315d127..01629c56 100644 --- a/R/joinGraph.R +++ b/R/joinGraph.R @@ -82,38 +82,6 @@ joinGraph <- function(x, y, tolerance = 0, ppm = 0, FUN = dotproduct, ...) { is.unsorted(x, na.rm = TRUE) || is.unsorted(y, na.rm = TRUE) } -#' @title Find all possible combinations -#' -#' @description -#' Similar to `expand.grid` but expects a `numeric` vector as input and returns -#' the indices. -#' -#' @param `x` `integer`, group numbers -#' @return `list`, each element represents a possible combination -#' @author Sebastian Gibb -#' @noRd -#' @examples -#' .combinations(c(1, 2, 2, 2, 3, 3)) -.combinations <- function(x) { - r <- rle(x) - ncs <- cumsum(c(0L, r$lengths)) - ncmb <- prod(r$lengths) - - if (ncmb > 1e10) - stop("too many possible combinations.") - - times <- 1L - l <- vector(mode = "list", length = length(r)) - - for (i in seq_along(r$lengths)) { - n <- r$lengths[i] - ncmb <- ncmb / n - l[[i]] <- rep.int(rep.int(ncs[i] + seq_len(n), rep.int(times, n)), ncmb) - times <- times * n - } - .transposeList(l) -} - #' @title Find Edge Groups #' #' @description @@ -208,29 +176,6 @@ joinGraph <- function(x, y, tolerance = 0, ppm = 0, FUN = dotproduct, ...) { setNames(.transposeList(e), c("x", "y")) } -#' @title Follower/Prev Identical -#' -#' @description -#' Tests whether the previous/following element in a vector is identical. -#' -#' @param x vector -#' -#' @return `logical` -#' @author Sebastian Gibb -#' @noRd -#' @examples -#' x <- c(1, 1, NA, 3, 4, 4, 5, 6, 6, 6, NA, 7, 8, 8) -#' .isFollowerIdentical(x) -#' .isPrecursorIdentical(x) -.isFollowerIdentical <- function(x) { - x <- x[-1L] == x[-length(x)] - c(x & !is.na(x), FALSE) -} -.isPrecursorIdentical <- function(x) { - x <- x[-1L] == x[-length(x)] - c(FALSE, x & !is.na(x)) -} - #' @title (Re)order edges #' #' @description @@ -259,24 +204,3 @@ joinGraph <- function(x, y, tolerance = 0, ppm = 0, FUN = dotproduct, ...) { o <- order(xe, method = "radix") list(x = e[[1L]][o], y = e[[2L]][o]) } - -#' @title Transpose List -#' -#' @description -#' Transpose a `n * m` `list` into an `m * n` one. -#' -#' @param x `list` -#' @return `list` -#' @author Sebastian Gibb -#' @noRd -#' @examples -#' .transposeList(list(a = 1:10, b = 11:20, c = 21:30)) -.transposeList <- function(x) { - n <- unique(lengths(x)) - - if (!is.list(x) || length(n) != 1L) - stop("'e' has to be a list with elements of equal length.") - l <- split(unlist(x, use.names = FALSE), seq_len(n)) - names(l) <- NULL - l -} diff --git a/R/utils.R b/R/utils.R index c4f1ae78..c98c322c 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,3 +1,79 @@ +#' @title Find all possible combinations +#' +#' @description +#' Similar to `expand.grid` but expects a `numeric` vector as input and returns +#' the indices. +#' +#' @param `x` `integer`, group numbers +#' @return `list`, each element represents a possible combination +#' @author Sebastian Gibb +#' @noRd +#' @examples +#' .combinations(c(1, 2, 2, 2, 3, 3)) +.combinations <- function(x) { + r <- rle(x) + ncs <- cumsum(c(0L, r$lengths)) + ncmb <- prod(r$lengths) + + if (ncmb > 1e10) + stop("too many possible combinations.") + + times <- 1L + l <- vector(mode = "list", length = length(r)) + + for (i in seq_along(r$lengths)) { + n <- r$lengths[i] + ncmb <- ncmb / n + l[[i]] <- rep.int(rep.int(ncs[i] + seq_len(n), rep.int(times, n)), ncmb) + times <- times * n + } + .transposeList(l) +} + +#' @title Follower/Prev Identical +#' +#' @description +#' Tests whether the previous/following element in a vector is identical. +#' +#' @param x vector +#' +#' @return `logical` +#' @author Sebastian Gibb +#' @noRd +#' @examples +#' x <- c(1, 1, NA, 3, 4, 4, 5, 6, 6, 6, NA, 7, 8, 8) +#' .isFollowerIdentical(x) +#' .isPrecursorIdentical(x) +.isFollowerIdentical <- function(x) { + x <- x[-1L] == x[-length(x)] + c(x & !is.na(x), FALSE) +} +.isPrecursorIdentical <- function(x) { + x <- x[-1L] == x[-length(x)] + c(FALSE, x & !is.na(x)) +} + +#' @title Transpose List +#' +#' @description +#' Transpose a `n * m` `list` into an `m * n` one. +#' +#' @param x `list` +#' @return `list` +#' @author Sebastian Gibb +#' @noRd +#' @examples +#' .transposeList(list(a = 1:10, b = 11:20, c = 21:30)) +.transposeList <- function(x) { + n <- unique(lengths(x)) + + if (!is.list(x) || length(n) != 1L) + stop("'e' has to be a list with elements of equal length.") + l <- split(unlist(x, use.names = FALSE), seq_len(n)) + names(l) <- NULL + l +} + #' @title Check for valid Window Size #' #' @param w `integer(1)`, window size diff --git a/tests/testthat/test_joinGraph.R b/tests/testthat/test_joinGraph.R index 564ee337..4095b96e 100644 --- a/tests/testthat/test_joinGraph.R +++ b/tests/testthat/test_joinGraph.R @@ -25,19 +25,6 @@ test_that(".anyCrossing", { expect_true(.anyCrossing(x = 1:3, y = c(2, 1, NA))) }) -test_that(".combinations", { - expect_error(.combinations(rep(1:100, each = 2)), "too many combinations") - - g <- c(1, 2, 2, 2, 3, 3) - l <- list( - c(1, 2, 5), c(1, 3, 5), c(1, 4, 5), - c(1, 2, 6), c(1, 3, 6), c(1, 4, 6) - ) - - expect_equal(.combinations(g), l) - expect_equal(.combinations(g), l) -}) - test_that(".edgeGroups", { e1 <- list(x = c(1, 2, NA, 3, 4, 4, 5), y = c(1, 1, 2, 3, 3, 4, 4)) e2 <- list(x = e1$y, y = e1$x) @@ -88,18 +75,6 @@ test_that(".edgeList", { expect_equal(.edgeList(x, y, tolerance = 0.2, ppm = 0), e) }) -test_that(".is{Precursor,Follower}Identical", { - x <- c(1, 1, NA, 3, 4, 4, 5, 6, 6, 6, NA, 7, 8, 8) - expect_equal(.isFollowerIdentical(x), c( - TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, - TRUE, FALSE, FALSE, FALSE, TRUE, FALSE) - ) - expect_equal(.isPrecursorIdentical(x), c( - FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, - TRUE, TRUE, FALSE, FALSE, FALSE, TRUE) - ) -}) - test_that(".orderEdges", { x <- c(100.1, 100.2, 300, 500) y <- c(100, 200, 300.1) @@ -108,11 +83,3 @@ test_that(".orderEdges", { expect_equal(.orderEdges(x, y, e), o) }) - -test_that(".transposeList", { - l <- list(a = 1:10, b = 11:20, c = 21:30) - r <- mapply(c, 1:10, 11:20, 21:30, SIMPLIFY = FALSE) - - expect_error(.transposeList(list(a = 1:3, b = 1:10)), "length") - expect_equal(.transposeList(l), r) -}) diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R index 8be417cd..dad6458e 100644 --- a/tests/testthat/test_utils.R +++ b/tests/testthat/test_utils.R @@ -1,3 +1,35 @@ +test_that(".combinations", { + expect_error(.combinations(rep(1:100, each = 2)), "too many combinations") + + g <- c(1, 2, 2, 2, 3, 3) + l <- list( + c(1, 2, 5), c(1, 3, 5), c(1, 4, 5), + c(1, 2, 6), c(1, 3, 6), c(1, 4, 6) + ) + + expect_equal(.combinations(g), l) +}) + +test_that(".is{Precursor,Follower}Identical", { + x <- c(1, 1, NA, 3, 4, 4, 5, 6, 6, 6, NA, 7, 8, 8) + expect_equal(.isFollowerIdentical(x), c( + TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, + TRUE, FALSE, FALSE, FALSE, TRUE, FALSE) + ) + expect_equal(.isPrecursorIdentical(x), c( + FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, + TRUE, TRUE, FALSE, FALSE, FALSE, TRUE) + ) +}) + +test_that(".transposeList", { + l <- list(a = 1:10, b = 11:20, c = 21:30) + r <- mapply(c, 1:10, 11:20, 21:30, SIMPLIFY = FALSE) + + expect_error(.transposeList(list(a = 1:3, b = 1:10)), "length") + expect_equal(.transposeList(l), r) +}) + test_that(".validateWindow", { expect_error(.validateWindow(3, 10L), "integer") expect_error(.validateWindow(3L:4L, 10L), "length") From 8c27b158e3bcee2757c4325aca09653c9b79521a Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Fri, 25 Oct 2019 10:47:22 +0200 Subject: [PATCH 23/32] tests: fix .combinations error message --- tests/testthat/test_utils.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R index dad6458e..dee59630 100644 --- a/tests/testthat/test_utils.R +++ b/tests/testthat/test_utils.R @@ -1,5 +1,8 @@ test_that(".combinations", { - expect_error(.combinations(rep(1:100, each = 2)), "too many combinations") + expect_error( + .combinations(rep(1:100, each = 2)), + "too many possible combinations" + ) g <- c(1, 2, 2, 2, 3, 3) l <- list( From 94421ffaf8f032c73f3ef394591e3e6708aaaa3b Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Fri, 25 Oct 2019 10:48:10 +0200 Subject: [PATCH 24/32] docs: run roxygenise --- NAMESPACE | 1 + man/binning.Rd | 2 +- man/matching.Rd | 42 ++++++++++++++++++++++++++++++++++++------ 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 19166e4e..81849798 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,6 +16,7 @@ export(dotproduct) export(i2index) export(isPeaksMatrix) export(join) +export(joinGraph) export(localMaxima) export(noise) export(ppm) diff --git a/man/binning.Rd b/man/binning.Rd index 98fecb59..4d34a270 100644 --- a/man/binning.Rd +++ b/man/binning.Rd @@ -42,7 +42,7 @@ bin(ints, mz, size = 2) bin(ints, mz, size = 2, FUN = sum) } \seealso{ -Other grouping/matching functions: \code{\link{closest}} +Other grouping/matching functions: \code{\link{joinGraph}} } \author{ Johannes Rainer, Sebastian Gibb diff --git a/man/matching.Rd b/man/matching.Rd index 4e0c79b6..f8d223ab 100644 --- a/man/matching.Rd +++ b/man/matching.Rd @@ -1,11 +1,14 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/matching.R -\name{closest} +% Please edit documentation in R/joinGraph.R, R/matching.R +\name{joinGraph} +\alias{joinGraph} \alias{closest} \alias{common} \alias{join} \title{Relaxed Value Matching} \usage{ +joinGraph(x, y, tolerance = 0, ppm = 0, FUN = dotproduct, ...) + closest(x, table, tolerance = Inf, ppm = 0, duplicates = c("keep", "closest", "remove"), nomatch = NA_integer_) @@ -18,8 +21,7 @@ join(x, y, tolerance = 0, ppm = 0, type = c("outer", "left", "right", \arguments{ \item{x}{\code{numeric}, the values to be matched.} -\item{table}{\code{numeric}, the values to be matched against. In contrast to -\code{\link[=match]{match()}} \code{table} has to be sorted in increasing order.} +\item{y}{\code{numeric}, the values to be joined. Should be sorted.} \item{tolerance}{\code{numeric}, accepted tolerance. Could be of length one or the same length as \code{table}.} @@ -27,18 +29,27 @@ the same length as \code{table}.} \item{ppm}{\code{numeric(1)} representing a relative, value-specific parts-per-million (PPM) tolerance that is added to \code{tolerance}.} +\item{FUN}{\code{function}, similarity function that should be maximized.} + +\item{\dots}{further arguments passed to \code{FUN}.} + +\item{table}{\code{numeric}, the values to be matched against. In contrast to +\code{\link[=match]{match()}} \code{table} has to be sorted in increasing order.} + \item{duplicates}{\code{character(1)}, how to handle duplicated matches.} \item{nomatch}{\code{numeric(1)}, if the difference between the value in \code{x} and \code{table} is larger than \code{tolerance} \code{nomatch} is returned.} -\item{y}{\code{numeric}, the values to be joined. Should be sorted.} - \item{type}{\code{character(1)}, defines how \code{x} and \code{y} should be joined. See details for \code{join}.} } \value{ +\code{joinGraph} returns a \code{list} with two columns, namely \code{x} and \code{y}, +representing the index of the values in \code{x} matching the corresponding value +in \code{y} (or \code{NA} if the value does not match). + \code{closest} returns an \code{integer} vector of the same length as \code{x} giving the closest position in \code{table} of the first match or \code{nomatch} if there is no match. @@ -57,6 +68,13 @@ just accept \code{numeric} arguments but have an additional \code{tolerance} argument that allows relaxed matching. } \details{ +\code{joinGraph}: joins two \code{matrix} by mapping values in \code{x} with +values in \code{y} and \emph{vice versa} if they are similar enough (provided the +\code{tolerance} and \code{ppm} specified). For multiple matches in \code{x} or \code{y} all +possible combinations are evaluated using the similarity function \code{FUN}. The +combination that yield the highest return value of \code{FUN} is used for the final +match. + For \code{closest}/\code{common} the \code{tolerance} argument could be set to \code{0} to get the same results as for \code{\link[=match]{match()}}/\code{\link{\%in\%}}. If it is set to \code{Inf} (default) the index of the closest values is returned without any restriction. @@ -97,6 +115,16 @@ to the behaviour of \code{match}). That means for multiple matches just the closest one is reported. } \examples{ + +x <- matrix( + c(100.001, 100.002, 300.01, 300.02, 1, 9, 1, 9), + ncol = 2L, dimnames = list(c(), c("mz", "intensity")) +) +y <- matrix( + c(100.0, 200.0, 300.002, 300.025, 300.0255, 9, 1, 1, 9, 1), + ncol = 2L, dimnames = list(c(), c("mz", "intensity")) +) +joinGraph(x, y, ppm = 20) ## Define two vectors to match x <- c(1, 3, 5) y <- 1:10 @@ -169,6 +197,8 @@ y[ji$y] Other grouping/matching functions: \code{\link{bin}} } \author{ +Sebastian Gibb, Thomas Naake + Sebastian Gibb } \concept{grouping/matching functions} From 3e557b4a98546a60523aa1d2858577919a5f45df Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Fri, 25 Oct 2019 10:48:28 +0200 Subject: [PATCH 25/32] fix: import stats::setNames --- NAMESPACE | 1 + R/joinGraph.R | 3 +++ 2 files changed, 4 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 81849798..32cdb1e3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -35,6 +35,7 @@ importFrom(methods,as) importFrom(methods,is) importFrom(stats,filter) importFrom(stats,mad) +importFrom(stats,setNames) importFrom(stats,supsmu) importMethodsFrom(S4Vectors,cbind) importMethodsFrom(S4Vectors,colnames) diff --git a/R/joinGraph.R b/R/joinGraph.R index 01629c56..1a0006a6 100644 --- a/R/joinGraph.R +++ b/R/joinGraph.R @@ -159,7 +159,10 @@ joinGraph <- function(x, y, tolerance = 0, ppm = 0, FUN = dotproduct, ...) { #' [igraph::graph_from_edge_list()`] the `list` has to be `rbind`ed, the #' indices in the `y` column have to be increased by `length(x)` and the `NA` #' values (no match) have to be removed manually. +#' #' @author Sebastian Gibb +#' +#' @importFrom stats setNames #' @noRd #' @examples #' x <- c(100.1, 100.2, 300, 500) From 734d76d8599b64c9c9f6c2caf589e54207cb7cf7 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Fri, 25 Oct 2019 10:51:00 +0200 Subject: [PATCH 26/32] docs: add function name to authors field --- R/joinGraph.R | 2 +- man/matching.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/joinGraph.R b/R/joinGraph.R index 1a0006a6..884b1995 100644 --- a/R/joinGraph.R +++ b/R/joinGraph.R @@ -15,7 +15,7 @@ #' representing the index of the values in `x` matching the corresponding value #' in `y` (or `NA` if the value does not match). #' -#' @author Sebastian Gibb, Thomas Naake +#' @author `joinGraph`: Sebastian Gibb, Thomas Naake #' @export #' @examples #' diff --git a/man/matching.Rd b/man/matching.Rd index f8d223ab..80ee8596 100644 --- a/man/matching.Rd +++ b/man/matching.Rd @@ -197,7 +197,7 @@ y[ji$y] Other grouping/matching functions: \code{\link{bin}} } \author{ -Sebastian Gibb, Thomas Naake +\code{joinGraph}: Sebastian Gibb, Thomas Naake Sebastian Gibb } From e0a2c06117f33a29b7c4f4b4615f1045a0698c9b Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Mon, 18 Nov 2019 10:20:06 +0100 Subject: [PATCH 27/32] fix: .combinations generated list with wrong length pointed out by @tnaake --- R/utils.R | 2 +- tests/testthat/test_utils.R | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/R/utils.R b/R/utils.R index c98c322c..aa213007 100644 --- a/R/utils.R +++ b/R/utils.R @@ -19,7 +19,7 @@ stop("too many possible combinations.") times <- 1L - l <- vector(mode = "list", length = length(r)) + l <- vector(mode = "list", length = length(r$lengths)) for (i in seq_along(r$lengths)) { n <- r$lengths[i] diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R index dee59630..1f5642f3 100644 --- a/tests/testthat/test_utils.R +++ b/tests/testthat/test_utils.R @@ -4,6 +4,8 @@ test_that(".combinations", { "too many possible combinations" ) + expect_equal(.combinations(c(1, 1, 1)), list(1, 2, 3)) + g <- c(1, 2, 2, 2, 3, 3) l <- list( c(1, 2, 5), c(1, 3, 5), c(1, 4, 5), From 5a50c3b1fb70610ebdc85fbcbc061d85b330d837 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Mon, 18 Nov 2019 11:01:43 +0100 Subject: [PATCH 28/32] tests: add unit test for .edgeGroups bug --- tests/testthat/test_joinGraph.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/testthat/test_joinGraph.R b/tests/testthat/test_joinGraph.R index 4095b96e..985448d6 100644 --- a/tests/testthat/test_joinGraph.R +++ b/tests/testthat/test_joinGraph.R @@ -41,6 +41,11 @@ test_that(".edgeGroups", { e4 <- list(x = c(1, 2, NA, NA, 3, 3, 3), y = c(1, 1, 2, 3, 4, 5, 6)) g <- c(1, 1, 2, 3, 4, 4, 4) expect_equal(.edgeGroups(e4), g) + + e5 <- list(x = c(1, 2, 3, NA, NA, 4, 5, 5, 6, 7, 8), + y = c(1, 1, 1, 2, 3, NA, 4, 5, 6, 6, 6)) + g <- c(1, 1, 1, 2, 3, 4, 5, 5, 6, 6, 6) + expect_equal(.edgeGroups(e5), g) }) test_that(".edgeGroupFrom", { From 0a72dadd39d2680054745ece60d84b50229f7dfd Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Mon, 18 Nov 2019 14:32:05 +0100 Subject: [PATCH 29/32] fix: .edgeGroup, not vectorized anymore --- R/joinGraph.R | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/R/joinGraph.R b/R/joinGraph.R index 884b1995..a6efccbc 100644 --- a/R/joinGraph.R +++ b/R/joinGraph.R @@ -100,14 +100,16 @@ joinGraph <- function(x, y, tolerance = 0, ppm = 0, FUN = dotproduct, ...) { if (!is.list(e) || n[1L] != n[2L]) stop("'e' has to be a list with two elements of equal length.") - ## na.rm = FALSE is important here. Otherwise duplicated indices that arn't - ## groups could occur. - g <- pmin(e[[1L]], e[[2L]], na.rm = FALSE) + px <- .isPrecursorIdentical(e[[1L]]) + py <- .isPrecursorIdentical(e[[2L]]) - ## that's more or less the same as rle but ignores values. - ## So it takes less memory and is slightly faster - g <- g[-1L] != g[-n[1L]] - cumsum(c(TRUE, g | is.na(g))) + for (i in seq_along(px)) { + if (px[i] && py[i - 1L]) + px[i] <- FALSE + if (py[i] && px[i - 1L]) + py[i] <- FALSE + } + cumsum(!(px | py)) } #' @title Find Origin of Edge Group From e201cb13e53a571d10bb5cf5fa4341b820a8e7a1 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Fri, 6 Mar 2020 14:44:50 +0100 Subject: [PATCH 30/32] fix: treat equal indicies as crossing --- R/joinGraph.R | 3 ++- tests/testthat/test_joinGraph.R | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/R/joinGraph.R b/R/joinGraph.R index a6efccbc..458b8d8e 100644 --- a/R/joinGraph.R +++ b/R/joinGraph.R @@ -79,7 +79,8 @@ joinGraph <- function(x, y, tolerance = 0, ppm = 0, FUN = dotproduct, ...) { #' .anyCrossing(x = 1:3, y = c(NA, 1:2)) #' .anyCrossing(x = 1:3, y = c(2, 1, NA)) .anyCrossing <- function(x, y) { - is.unsorted(x, na.rm = TRUE) || is.unsorted(y, na.rm = TRUE) + is.unsorted(x, na.rm = TRUE, strictly = TRUE) || + is.unsorted(y, na.rm = TRUE, strictly = TRUE) } #' @title Find Edge Groups diff --git a/tests/testthat/test_joinGraph.R b/tests/testthat/test_joinGraph.R index 985448d6..e8fde297 100644 --- a/tests/testthat/test_joinGraph.R +++ b/tests/testthat/test_joinGraph.R @@ -23,6 +23,7 @@ test_that("joinGraph", { test_that(".anyCrossing", { expect_false(.anyCrossing(x = 1:3, y = c(NA, 1:2))) expect_true(.anyCrossing(x = 1:3, y = c(2, 1, NA))) + expect_true(.anyCrossing(x = 1:3, y = c(1, 1, NA))) }) test_that(".edgeGroups", { From 8495a44247b1250374124d05efaad7c41dfdfcc8 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Fri, 6 Mar 2020 14:48:29 +0100 Subject: [PATCH 31/32] fix: throw a warning if no valid match was found --- R/joinGraph.R | 7 ++++++- tests/testthat/test_joinGraph.R | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/R/joinGraph.R b/R/joinGraph.R index 458b8d8e..6971d2ec 100644 --- a/R/joinGraph.R +++ b/R/joinGraph.R @@ -57,7 +57,12 @@ joinGraph <- function(x, y, tolerance = 0, ppm = 0, FUN = dotproduct, ...) { else FUN(xx, yy, ...) }) - cmb <- cmb[[which.max(score)]] + iscore <- which.max(score) + + if (!score[iscore]) + warning("Could not find a matching with a score > 0.") + + cmb <- cmb[[iscore]] namask[gi[cmb],] <- edges[gi[cmb],] list(x = namask[, 1L], y = namask[, 2L]) diff --git a/tests/testthat/test_joinGraph.R b/tests/testthat/test_joinGraph.R index e8fde297..d4c008b3 100644 --- a/tests/testthat/test_joinGraph.R +++ b/tests/testthat/test_joinGraph.R @@ -18,6 +18,11 @@ test_that("joinGraph", { y = c(NA, 1, 2, 3, NA, 4, 5) ) expect_equal(joinGraph(x, y, ppm = 20), l) + + z <- matrix( + c(100, 100, 1, 1), ncol = 2L, dimnames = list(c(), c("mz", "intensity")) + ) + expect_warning(joinGraph(z, z), "score > 0") }) test_that(".anyCrossing", { From 096ff09e3b4d5e115379ece8fcbc151c01329ce9 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Fri, 6 Mar 2020 14:49:26 +0100 Subject: [PATCH 32/32] refactor: remove validPeaksMatrix call --- R/joinGraph.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/joinGraph.R b/R/joinGraph.R index 6971d2ec..d9794e52 100644 --- a/R/joinGraph.R +++ b/R/joinGraph.R @@ -29,8 +29,6 @@ #' ) #' joinGraph(x, y, ppm = 20) joinGraph <- function(x, y, tolerance = 0, ppm = 0, FUN = dotproduct, ...) { - validPeaksMatrix(x) - validPeaksMatrix(y) FUN <- match.fun(FUN) e <- .edgeList(x[, 1L], y[, 1L], tolerance = tolerance, ppm = ppm)