tidyverse · luisDVA · Aug 21, 2024 · Aug 21, 2024
diff --git a/R/encoding.R b/R/encoding.R
@@ -11,12 +11,12 @@
 #' # A file with bad encoding included in the package
 #' path <- system.file("html-ex", "bad-encoding.html", package = "rvest")
 #' x <- read_html(path)
-#' x %>% html_elements("p") %>% html_text()
+#' x |> html_elements("p") |> html_text()
 #'
 #' html_encoding_guess(x)
 #' # Two valid encodings, only one of which is correct
-#' read_html(path, encoding = "ISO-8859-1") %>% html_elements("p") %>% html_text()
-#' read_html(path, encoding = "ISO-8859-2") %>% html_elements("p") %>% html_text()
+#' read_html(path, encoding = "ISO-8859-1") |> html_elements("p") |> html_text()
+#' read_html(path, encoding = "ISO-8859-2") |> html_elements("p") |> html_text()
 html_encoding_guess <- function(x) {
   check_installed("stringi")
 

diff --git a/R/form.R b/R/form.R
@@ -22,11 +22,11 @@
 #' html <- read_html("http://www.google.com")
 #' search <- html_form(html)[[1]]
 #'
-#' search <- search %>% html_form_set(q = "My little pony", hl = "fr")
+#' search <- search |> html_form_set(q = "My little pony", hl = "fr")
 #'
 #' # Or if you have a list of values, use !!!
 #' vals <- list(q = "web scraping", hl = "en")
-#' search <- search %>% html_form_set(!!!vals)
+#' search <- search |> html_form_set(!!!vals)
 #'
 #' # To submit and get result:
 #' \dontrun{

diff --git a/R/html.R b/R/html.R
@@ -8,9 +8,9 @@
 #' url <- "https://rvest.tidyverse.org/articles/starwars.html"
 #' html <- read_html(url)
 #'
-#' html %>%
-#'   html_element("div") %>%
-#'   html_children() %>%
+#' html |>
+#'   html_element("div") |>
+#'   html_children() |>
 #'   html_name()
 #' @export
 #' @importFrom xml2 xml_name
@@ -35,11 +35,11 @@ html_name <- function(x) {
 #'   <li><a href="https://c.com">b</a></li>
 #'   </ul>')
 #'
-#' html %>% html_elements("a") %>% html_attrs()
+#' html |> html_elements("a") |> html_attrs()
 #'
-#' html %>% html_elements("a") %>% html_attr("href")
-#' html %>% html_elements("li") %>% html_attr("class")
-#' html %>% html_elements("li") %>% html_attr("class", default = "inactive")
+#' html |> html_elements("a") |> html_attr("href")
+#' html |> html_elements("li") |> html_attr("class")
+#' html |> html_elements("li") |> html_attr("class", default = "inactive")
 #' @export
 #' @importFrom xml2 xml_attr
 html_attr <- function(x, name, default = NA_character_) {

diff --git a/R/live.R b/R/live.R
@@ -27,16 +27,16 @@
 #' # When we retrieve the raw HTML for this site, it doesn't contain the
 #' # data we're interested in:
 #' static <- read_html("https://www.forbes.com/top-colleges/")
-#' static %>% html_elements(".TopColleges2023_tableRow__BYOSU")
+#' static |> html_elements(".TopColleges2023_tableRow__BYOSU")
 #'
 #' # Instead, we need to run the site in a real web browser, causing it to
 #' # download a JSON file and then dynamically generate the html:
 #'
 #' sess <- read_html_live("https://www.forbes.com/top-colleges/")
 #' sess$view()
-#' rows <- sess %>% html_elements(".TopColleges2023_tableRow__BYOSU")
-#' rows %>% html_element(".TopColleges2023_organizationName__J1lEV") %>% html_text()
-#' rows %>% html_element(".grant-aid") %>% html_text()
+#' rows <- sess |> html_elements(".TopColleges2023_tableRow__BYOSU")
+#' rows |> html_element(".TopColleges2023_organizationName__J1lEV") |> html_text()
+#' rows |> html_element(".grant-aid") |> html_text()
 #' }
 read_html_live <- function(url) {
   check_installed(c("chromote", "R6"))
@@ -67,11 +67,11 @@ read_html_live <- function(url) {
 #' sess <- read_html_live("https://www.bodybuilding.com/exercises/finder")
 #' sess$view()
 #'
-#' sess %>% html_elements(".ExResult-row") %>% length()
+#' sess |> html_elements(".ExResult-row") |> length()
 #' sess$click(".ExLoadMore-btn")
-#' sess %>% html_elements(".ExResult-row") %>% length()
+#' sess |> html_elements(".ExResult-row") |> length()
 #' sess$click(".ExLoadMore-btn")
-#' sess %>% html_elements(".ExResult-row") %>% length()
+#' sess |> html_elements(".ExResult-row") |> length()
 #' }
 LiveHTML <- R6::R6Class(
   "LiveHTML",

diff --git a/R/rvest-package.R b/R/rvest-package.R
@@ -30,21 +30,21 @@
 #' # Then find elements that match a css selector or XPath expression
 #' # using html_elements(). In this example, each <section> corresponds
 #' # to a different film
-#' films <- starwars %>% html_elements("section")
+#' films <- starwars |> html_elements("section")
 #' films
 #'
 #' # Then use html_element() to extract one element per film. Here
 #' # we the title is given by the text inside <h2>
-#' title <- films %>%
-#'   html_element("h2") %>%
+#' title <- films |>
+#'   html_element("h2") |>
 #'   html_text2()
 #' title
 #'
 #' # Or use html_attr() to get data out of attributes. html_attr() always
 #' # returns a string so we convert it to an integer using a readr function
-#' episode <- films %>%
-#'   html_element("h2") %>%
-#'   html_attr("data-id") %>%
+#' episode <- films |>
+#'   html_element("h2") |>
+#'   html_attr("data-id") |>
 #'   readr::parse_integer()
 #' episode
 xml2::read_html

diff --git a/R/selectors.R b/R/selectors.R
@@ -40,10 +40,10 @@
 #'   <p class='important'>This is an important paragraph</p>
 #' ")
 #'
-#' html %>% html_element("h1")
-#' html %>% html_elements("p")
-#' html %>% html_elements(".important")
-#' html %>% html_elements("#first")
+#' html |> html_element("h1")
+#' html |> html_elements("p")
+#' html |> html_elements(".important")
+#' html |> html_elements("#first")
 #'
 #' # html_element() vs html_elements() --------------------------------------
 #' html <- minimal_html("
@@ -54,18 +54,18 @@
 #'     <li><b>R4-P17</b> is a <i>droid</i></li>
 #'   </ul>
 #' ")
-#' li <- html %>% html_elements("li")
+#' li <- html |> html_elements("li")
 #'
 #' # When applied to a node set, html_elements() returns all matching elements
 #' # beneath any of the inputs, flattening results into a new node set.
-#' li %>% html_elements("i")
+#' li |> html_elements("i")
 #'
 #' # When applied to a node set, html_element() always returns a vector the
 #' # same length as the input, using a "missing" element where needed.
-#' li %>% html_element("i")
+#' li |> html_element("i")
 #' # and html_text() and html_attr() will return NA
-#' li %>% html_element("i") %>% html_text2()
-#' li %>% html_element("span") %>% html_attr("class")
+#' li |> html_element("i") |> html_text2()
+#' li |> html_element("span") |> html_attr("class")
 html_element <- function(x, css, xpath) {
   UseMethod("html_element")
 }

diff --git a/R/session.R b/R/session.R
@@ -22,19 +22,19 @@
 #' @export
 #' @examples
 #' s <- session("http://hadley.nz")
-#' s %>%
-#'   session_jump_to("hadley-wickham.jpg") %>%
-#'   session_jump_to("/") %>%
+#' s |>
+#'   session_jump_to("hadley-wickham.jpg") |>
+#'   session_jump_to("/") |>
 #'   session_history()
 #'
-#' s %>%
-#'   session_jump_to("hadley-wickham.jpg") %>%
-#'   session_back() %>%
+#' s |>
+#'   session_jump_to("hadley-wickham.jpg") |>
+#'   session_back() |>
 #'   session_history()
 #'
 #' \donttest{
-#' s %>%
-#'   session_follow_link(css = "p a") %>%
+#' s |>
+#'   session_follow_link(css = "p a") |>
 #'   html_elements("p")
 #' }
 session <- function(url, ...) {

diff --git a/R/table.R b/R/table.R
@@ -30,8 +30,8 @@
 #'   <tr><td>4</td><td>y</td></tr>
 #'   <tr><td>10</td><td>z</td></tr>
 #' </table>")
-#' sample1 %>%
-#'   html_element("table") %>%
+#' sample1 |>
+#'   html_element("table") |>
 #'   html_table()
 #'
 #' # Values in merged cells will be duplicated
@@ -41,8 +41,8 @@
 #'   <tr><td colspan='2'>4</td><td>5</td></tr>
 #'   <tr><td>6</td><td colspan='2'>7</td></tr>
 #' </table>")
-#' sample2 %>%
-#'   html_element("table") %>%
+#' sample2 |>
+#'   html_element("table") |>
 #'   html_table()
 #'
 #' # If a row is missing cells, they'll be filled with NAs
@@ -52,8 +52,8 @@
 #'   <tr><td colspan='2'>3</td></tr>
 #'   <tr><td>4</td></tr>
 #' </table>")
-#' sample3 %>%
-#'   html_element("table") %>%
+#' sample3 |>
+#'   html_element("table") |>
 #'   html_table()
 html_table <- function(x,
                        header = NA,

diff --git a/R/text.R b/R/text.R
@@ -27,17 +27,17 @@
 #'
 #' # html_text() returns the raw underlying text, which includes whitespace
 #' # that would be ignored by a browser, and ignores the <br>
-#' html %>% html_element("p") %>% html_text() %>% writeLines()
+#' html |> html_element("p") |> html_text() |> writeLines()
 #'
 #' # html_text2() simulates what a browser would display. Non-significant
 #' # whitespace is collapsed, and <br> is turned into a line break
-#' html %>% html_element("p") %>% html_text2() %>% writeLines()
+#' html |> html_element("p") |> html_text2() |> writeLines()
 #'
 #' # By default, html_text2() also converts non-breaking spaces to regular
 #' # spaces:
 #' html <- minimal_html("<p>x&nbsp;y</p>")
-#' x1 <- html %>% html_element("p") %>% html_text()
-#' x2 <- html %>% html_element("p") %>% html_text2()
+#' x1 <- html |> html_element("p") |> html_text()
+#' x2 <- html |> html_element("p") |> html_text2()
 #'
 #' # When printed, non-breaking spaces look exactly like regular spaces
 #' x1

diff --git a/README.Rmd b/README.Rmd
@@ -50,21 +50,21 @@ starwars <- read_html("https://rvest.tidyverse.org/articles/starwars.html")
 # Then find elements that match a css selector or XPath expression
 # using html_elements(). In this example, each <section> corresponds
 # to a different film
-films <- starwars %>% html_elements("section")
+films <- starwars |> html_elements("section")
 films
 
 # Then use html_element() to extract one element per film. Here
 # we the title is given by the text inside <h2>
-title <- films %>% 
-  html_element("h2") %>% 
+title <- films |> 
+  html_element("h2") |> 
   html_text2()
 title
 
 # Or use html_attr() to get data out of attributes. html_attr() always
 # returns a string so we convert it to an integer using a readr function
-episode <- films %>% 
-  html_element("h2") %>% 
-  html_attr("data-id") %>% 
+episode <- films |> 
+  html_element("h2") |> 
+  html_attr("data-id") |> 
   readr::parse_integer()
 episode
 ```
@@ -74,7 +74,7 @@ If the page contains tabular data you can convert it directly to a data frame wi
 ```{r}
 html <- read_html("https://en.wikipedia.org/w/index.php?title=The_Lego_Movie&oldid=998422565")
 
-html %>% 
-  html_element(".tracklist") %>% 
+html |> 
+  html_element(".tracklist") |> 
   html_table()
 ```
diff --git a/README.md b/README.md
@@ -47,7 +47,7 @@ starwars <- read_html("https://rvest.tidyverse.org/articles/starwars.html")
 # Then find elements that match a css selector or XPath expression
 # using html_elements(). In this example, each <section> corresponds
 # to a different film
-films <- starwars %>% html_elements("section")
+films <- starwars |> html_elements("section")
 films
 #> {xml_nodeset (7)}
 #> [1] <section><h2 data-id="1">\nThe Phantom Menace\n</h2>\n<p>\nReleased: 1999 ...
@@ -57,23 +57,29 @@ films
 #> [5] <section><h2 data-id="5">\nThe Empire Strikes Back\n</h2>\n<p>\nReleased: ...
 #> [6] <section><h2 data-id="6">\nReturn of the Jedi\n</h2>\n<p>\nReleased: 1983 ...
 #> [7] <section><h2 data-id="7">\nThe Force Awakens\n</h2>\n<p>\nReleased: 2015- ...
+```
+
+``` r
 
 # Then use html_element() to extract one element per film. Here
 # we the title is given by the text inside <h2>
-title <- films %>% 
-  html_element("h2") %>% 
+title <- films |> 
+  html_element("h2") |> 
   html_text2()
 title
 #> [1] "The Phantom Menace"      "Attack of the Clones"   
 #> [3] "Revenge of the Sith"     "A New Hope"             
 #> [5] "The Empire Strikes Back" "Return of the Jedi"     
 #> [7] "The Force Awakens"
+```
+
+``` r
 
 # Or use html_attr() to get data out of attributes. html_attr() always
 # returns a string so we convert it to an integer using a readr function
-episode <- films %>% 
-  html_element("h2") %>% 
-  html_attr("data-id") %>% 
+episode <- films |> 
+  html_element("h2") |> 
+  html_attr("data-id") |> 
   readr::parse_integer()
 episode
 #> [1] 1 2 3 4 5 6 7
@@ -85,8 +91,8 @@ frame with `html_table()`:
 ``` r
 html <- read_html("https://en.wikipedia.org/w/index.php?title=The_Lego_Movie&oldid=998422565")
 
-html %>% 
-  html_element(".tracklist") %>% 
+html |> 
+  html_element(".tracklist") |> 
   html_table()
 #> # A tibble: 29 × 4
 #>    No.   Title                       `Performer(s)`                       Length

diff --git a/demo/tripadvisor.R b/demo/tripadvisor.R
@@ -5,32 +5,32 @@ library(rvest)
 
 url <- "http://www.tripadvisor.com/Hotel_Review-g37209-d1762915-Reviews-JW_Marriott_Indianapolis-Indianapolis_Indiana.html"
 
-reviews <- url %>%
-  read_html() %>%
+reviews <- url |>
+  read_html() |>
   html_elements("#REVIEWS .innerBubble")
 
-id <- reviews %>%
-  html_element(".quote a") %>%
+id <- reviews |>
+  html_element(".quote a") |>
   html_attr("id")
 
-quote <- reviews %>%
-  html_element(".quote span") %>%
+quote <- reviews |>
+  html_element(".quote span") |>
   html_text()
 
-rating <- reviews %>%
-  html_element(".rating .rating_s_fill") %>%
-  html_attr("alt") %>%
-  gsub(" of 5 stars", "", .) %>%
+rating <- reviews |>
+  html_element(".rating .rating_s_fill") |>
+  html_attr("alt") |>
+  gsub(" of 5 stars", "", .) |>
   as.integer()
 
-date <- reviews %>%
-  html_element(".rating .ratingDate") %>%
-  html_attr("title") %>%
-  strptime("%b %d, %Y") %>%
+date <- reviews |>
+  html_element(".rating .ratingDate") |>
+  html_attr("title") |>
+  strptime("%b %d, %Y") |>
   as.POSIXct()
 
-review <- reviews %>%
-  html_element(".entry .partial_entry") %>%
+review <- reviews |>
+  html_element(".entry .partial_entry") |>
   html_text()
 
-data.frame(id, quote, rating, date, review, stringsAsFactors = FALSE) %>% View()
+data.frame(id, quote, rating, date, review, stringsAsFactors = FALSE) |> View()