From d6a5e05cb613981c0db49b9b6c8242d0d3139e3e Mon Sep 17 00:00:00 2001
From: randef1ned <46381867+randef1ned@users.noreply.github.com>
Date: Tue, 17 Jun 2025 11:02:35 +0200
Subject: [PATCH 1/5] Fixed the duplicated names
---
DESCRIPTION | 2 +-
NAMESPACE | 2 ++
R/as_list.R | 85 +++++++++++++++++++++++++++++++++++++++++++++
R/as_xml_document.R | 35 ++++++++++++++++++-
R/xml_missing.R | 1 +
5 files changed, 123 insertions(+), 2 deletions(-)
diff --git a/DESCRIPTION b/DESCRIPTION
index c44b42ae..3efad521 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -36,7 +36,7 @@ VignetteBuilder:
Config/Needs/website: tidyverse/tidytemplate
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.2
SystemRequirements: libxml2: libxml2-dev (deb), libxml2-devel (rpm)
Collate:
'S4.R'
diff --git a/NAMESPACE b/NAMESPACE
index 8f74da43..6b460acb 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -123,7 +123,9 @@ export("xml_attrs<-")
export("xml_name<-")
export("xml_text<-")
export(as_list)
+export(as_list2)
export(as_xml_document)
+export(deduplicate)
export(download_html)
export(download_xml)
export(html_structure)
diff --git a/R/as_list.R b/R/as_list.R
index be9474fc..179266c8 100644
--- a/R/as_list.R
+++ b/R/as_list.R
@@ -25,6 +25,7 @@
#' as_list(read_xml(" "))
#' as_list(read_xml(""))
#' as_list(read_xml(""))
+#' as_list(read_xml("ab"))
as_list <- function(x, ns = character(), ...) {
UseMethod("as_list")
}
@@ -106,3 +107,87 @@ r_attrs_to_xml <- function(x) {
names(x)[special] <- sub("^\\.", "", names(x)[special])
x
}
+
+#' Coerce xml nodes to a list with better handling duplicate elements
+#'
+#' This turns an XML document (or node or nodeset) into the equivalent R
+#' list. This functions like `as_list()` but ensures elements with duplicate
+#' names are put into indexed lists.
+#'
+#' @inheritParams xml_name
+#' @param ... Needed for compatibility with generic. Unused.
+#' @export
+#'
+#' @examples
+#' # With duplicate elements
+#' xml <- read_xml("ab")
+#' lst <- as_list(xml)
+#' lst$content$x # Returns "a" solely
+#' lst2 <- as_list2(xml)
+#' lst2$content$x # Returns "a" and "b"
+#' lst2$content$x[[1]] # Returns "a"
+#' lst2$content$x[[2]] # Returns "b"
+#'
+#' # With attributes preserved
+#' xml <- read_xml("34")
+#' as_list2(xml)
+as_list2 <- function(x, ns = character(), ...) {
+ result <- as_list(x, ns = ns, ...)
+
+ deduplicate(result)
+}
+
+
+#' Deduplicate named elements in a list
+#' @param lst A list potentially containing duplicate named elements
+#' @return A list with duplicate elements consolidated
+#' @export
+deduplicate <- function(lst) {
+ if (!is.list(lst) || length(lst) == 0 || is.null(names(lst)) || all(names(lst) == "")) {
+ return(lst)
+ }
+
+ attrs <- attributes(lst)
+
+ nms <- names(lst)
+
+ duplicated_names <- unique(nms[duplicated(nms[nms != ""])])
+
+ if (length(duplicated_names) == 0) {
+ # Recursively deal with duplications in the list
+ result <- lapply(lst, deduplicate)
+ attributes(result) <- attrs
+ return(result)
+ }
+
+ for (name in duplicated_names) {
+ deduplicated_index <- which(nms == name)
+
+ values <- lapply(deduplicated_index, function(i) {
+ if (is.list(lst[[i]]) && length(lst[[i]]) == 1 && is.character(lst[[i]][[1]])) {
+ return(lst[[i]][[1]])
+ } else {
+ # Fallback option
+ return(lst[[i]])
+ }
+ })
+
+ lst[[deduplicated_index[1]]] <- values
+
+ if (length(deduplicated_index) > 1) {
+ lst <- lst[-deduplicated_index[-1]]
+ nms <- nms[-deduplicated_index[-1]]
+ }
+ names(lst) <- nms
+ }
+
+ for (i in seq_along(lst)) {
+ if (is.list(lst[[i]]) && !(i %in% which(names(lst) %in% duplicated_names))) {
+ lst[[i]] <- deduplicate(lst[[i]])
+ }
+ }
+
+ attrs$names <- names(lst)
+ attributes(lst) <- attrs
+ return(lst)
+}
diff --git a/R/as_xml_document.R b/R/as_xml_document.R
index c24f1226..225834d8 100644
--- a/R/as_xml_document.R
+++ b/R/as_xml_document.R
@@ -56,7 +56,31 @@ as_xml_document.list <- function(x, ...) {
}
}
for (i in seq_along(x)) {
- add_node(x[[i]], parent, names(x)[[i]])
+ # Handle for duplicate-named elements
+ item <- x[[i]]
+ item_name <- names(x)[i]
+
+ if (is_contain_duplicated(item)) {
+ for (j in seq_along(item)) {
+ sub_item <- item[[j]]
+ new_node <- xml_add_child(parent, item_name)
+
+ if (is.character(sub_item) && length(sub_item) == 1) {
+ xml_text(new_node) <- sub_item
+ } else if (is.list(sub_item)) {
+ attr <- r_attrs_to_xml(attributes(sub_item))
+ for (k in seq_along(attr)) {
+ xml_set_attr(new_node, names(attr)[[k]], attr[[k]])
+ }
+
+ for (k in seq_along(sub_item)) {
+ add_node(sub_item[[k]], new_node, names(sub_item)[k])
+ }
+ }
+ }
+ } else {
+ add_node(item, parent, names(x)[[i]])
+ }
}
}
@@ -83,3 +107,12 @@ as_xml_document.xml_nodeset <- function(x, root, ...) {
as_xml_document.xml_document <- function(x, ...) {
x
}
+
+is_contain_duplicated <- function(lst) {
+ if (is.null(names(lst)) || all(names(lst) == "")) {
+ if (length(lst) > 1 && all(sapply(lst, function(x) is.list(x) || is.character(x)))) {
+ return(TRUE)
+ }
+ }
+ return(FALSE)
+}
diff --git a/R/xml_missing.R b/R/xml_missing.R
index 0600b270..5945731f 100644
--- a/R/xml_missing.R
+++ b/R/xml_missing.R
@@ -7,6 +7,7 @@ xml_missing <- function() {
out
}
+#' @export
format.xml_missing <- function(x, ...) {
""
}
From acba7ef37766e00b1955807b934d436390039a14 Mon Sep 17 00:00:00 2001
From: randef1ned <46381867+randef1ned@users.noreply.github.com>
Date: Tue, 17 Jun 2025 11:33:36 +0200
Subject: [PATCH 2/5] Fix issue #215
---
R/as_list.R | 9 ++++++++-
R/as_xml_document.R | 4 +++-
2 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/R/as_list.R b/R/as_list.R
index 179266c8..61b20350 100644
--- a/R/as_list.R
+++ b/R/as_list.R
@@ -134,7 +134,14 @@ r_attrs_to_xml <- function(x) {
as_list2 <- function(x, ns = character(), ...) {
result <- as_list(x, ns = ns, ...)
- deduplicate(result)
+ if (length(result) == 1 && length(unlist(result)) == 1) {
+ item <- unlist(result)
+ result <- list(unname(item))
+ names(result) <- names(item)
+ } else {
+ result <- deduplicate(result)
+ }
+ return(result)
}
diff --git a/R/as_xml_document.R b/R/as_xml_document.R
index 225834d8..a289b80f 100644
--- a/R/as_xml_document.R
+++ b/R/as_xml_document.R
@@ -42,7 +42,9 @@ as_xml_document.list <- function(x, ...) {
if (length(x) > 1) {
cli::cli_abort("Root nodes must be of length 1.")
}
-
+ if (length(x[[1]]) == 1 && is.vector(x[[1]])) {
+ x[[1]] <- list(x[[1]])
+ }
add_node <- function(x, parent, tag = NULL) {
if (is.atomic(x)) {
From a0544d7bfd7f53881e53bfec56de2f466be01623 Mon Sep 17 00:00:00 2001
From: randef1ned <46381867+randef1ned@users.noreply.github.com>
Date: Tue, 17 Jun 2025 11:38:09 +0200
Subject: [PATCH 3/5] Add test cases
---
tests/testthat/test-as_list.R | 8 ++++++++
tests/testthat/test-as_xml_document.R | 9 +++++++--
2 files changed, 15 insertions(+), 2 deletions(-)
diff --git a/tests/testthat/test-as_list.R b/tests/testthat/test-as_list.R
index a85dc4b8..6f7c9335 100644
--- a/tests/testthat/test-as_list.R
+++ b/tests/testthat/test-as_list.R
@@ -1,4 +1,5 @@
list_xml <- function(x) as_list(read_xml(x))
+list_xml2 <- function(x) as_list2(read_xml(x))
test_that("empty elements become empty lists", {
expect_equal(list_xml(""), list(x = list()))
@@ -39,3 +40,10 @@ test_that("attributes in child nodes", {
list(w = structure(list(x = structure(list(y = list("3"), z = list("4")), a = "1", b = "2", .names = "esc")), aa = "0"))
)
})
+
+test_that("Duplicated items", {
+ expect_equal(
+ list_xml2("ab"),
+ list(content = list(x = list('a', 'b')))
+ )
+})
diff --git a/tests/testthat/test-as_xml_document.R b/tests/testthat/test-as_xml_document.R
index a902162a..1b53bb5c 100644
--- a/tests/testthat/test-as_xml_document.R
+++ b/tests/testthat/test-as_xml_document.R
@@ -1,6 +1,6 @@
-roundtrip_xml <- function(x) {
+roundtrip_xml <- function(x, as_list_version = 1) {
xml <- read_xml(x)
- lst <- as_list(xml)
+ lst <- if (as_list_version > 1) as_list2(xml) else as_list(xml)
xml2 <- as_xml_document(lst)
expect_equal(as.character(xml), as.character(xml2))
}
@@ -34,6 +34,11 @@ test_that("rountrips with special attributes", {
roundtrip_xml("")
})
+test_that("rountrips with only one element", {
+ roundtrip_xml("bar")
+ roundtrip_xml("bar", as_list_version = 2)
+})
+
test_that("more than one root node is an error", {
expect_error(as_xml_document(list(a = list(), b = list())), "Root nodes must be of length 1")
})
From 082e120ea26b9fa83b940ab13ccbe2486227725f Mon Sep 17 00:00:00 2001
From: randef1ned <46381867+randef1ned@users.noreply.github.com>
Date: Tue, 17 Jun 2025 12:00:17 +0200
Subject: [PATCH 4/5] Documentation for two functions
---
NAMESPACE | 1 +
man/as_list.Rd | 1 +
man/as_list2.Rd | 40 ++++++++++++++++++++++++++++++++++++++++
man/deduplicate.Rd | 17 +++++++++++++++++
4 files changed, 59 insertions(+)
create mode 100644 man/as_list2.Rd
create mode 100644 man/deduplicate.Rd
diff --git a/NAMESPACE b/NAMESPACE
index 6b460acb..8d6ded73 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -31,6 +31,7 @@ S3method(as_xml_document,response)
S3method(as_xml_document,xml_document)
S3method(as_xml_document,xml_node)
S3method(as_xml_document,xml_nodeset)
+S3method(format,xml_missing)
S3method(format,xml_node)
S3method(is.na,xml_missing)
S3method(is.na,xml_node)
diff --git a/man/as_list.Rd b/man/as_list.Rd
index ffb8d876..622c6730 100644
--- a/man/as_list.Rd
+++ b/man/as_list.Rd
@@ -43,4 +43,5 @@ as_list(read_xml(" a ]]>"))
as_list(read_xml(" "))
as_list(read_xml(""))
as_list(read_xml(""))
+as_list(read_xml("ab"))
}
diff --git a/man/as_list2.Rd b/man/as_list2.Rd
new file mode 100644
index 00000000..5a793da0
--- /dev/null
+++ b/man/as_list2.Rd
@@ -0,0 +1,40 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/as_list.R
+\name{as_list2}
+\alias{as_list2}
+\title{Coerce xml nodes to a list with better handling duplicate elements}
+\usage{
+as_list2(x, ns = character(), ...)
+}
+\arguments{
+\item{x}{A document, node, or node set.}
+
+\item{ns}{Optionally, a named vector giving prefix-url pairs, as produced
+by \code{\link[=xml_ns]{xml_ns()}}. If provided, all names will be explicitly
+qualified with the ns prefix, i.e. if the element \code{bar} is defined
+in namespace \code{foo}, it will be called \code{foo:bar}. (And
+similarly for attributes). Default namespaces must be given an explicit
+name. The ns is ignored when using \code{\link[=xml_name<-]{xml_name<-()}} and
+\code{\link[=xml_set_name]{xml_set_name()}}.}
+
+\item{...}{Needed for compatibility with generic. Unused.}
+}
+\description{
+This turns an XML document (or node or nodeset) into the equivalent R
+list. This functions like \code{as_list()} but ensures elements with duplicate
+names are put into indexed lists.
+}
+\examples{
+# With duplicate elements
+xml <- read_xml("ab")
+lst <- as_list(xml)
+lst$content$x # Returns "a" solely
+lst2 <- as_list2(xml)
+lst2$content$x # Returns "a" and "b"
+lst2$content$x[[1]] # Returns "a"
+lst2$content$x[[2]] # Returns "b"
+
+# With attributes preserved
+xml <- read_xml("34")
+as_list2(xml)
+}
diff --git a/man/deduplicate.Rd b/man/deduplicate.Rd
new file mode 100644
index 00000000..4c4a6863
--- /dev/null
+++ b/man/deduplicate.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/as_list.R
+\name{deduplicate}
+\alias{deduplicate}
+\title{Deduplicate named elements in a list}
+\usage{
+deduplicate(lst)
+}
+\arguments{
+\item{lst}{A list potentially containing duplicate named elements}
+}
+\value{
+A list with duplicate elements consolidated
+}
+\description{
+Deduplicate named elements in a list
+}
From 48061618819175700c5dbcad619a17d01f97b86a Mon Sep 17 00:00:00 2001
From: randef1ned <46381867+randef1ned@users.noreply.github.com>
Date: Tue, 17 Jun 2025 12:14:17 +0200
Subject: [PATCH 5/5] Add navigation in pkgdown configuration file
---
_pkgdown.yml | 1 +
1 file changed, 1 insertion(+)
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 01e37265..db0535dc 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -57,6 +57,7 @@ reference:
contents:
- ends_with("serialize")
- xml2_example
+ - deduplicate
news:
releases: