Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

replace magritttr pipes with base pipes #418

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions R/encoding.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@
#' # A file with bad encoding included in the package
#' path <- system.file("html-ex", "bad-encoding.html", package = "rvest")
#' x <- read_html(path)
#' x %>% html_elements("p") %>% html_text()
#' x |> html_elements("p") |> html_text()
#'
#' html_encoding_guess(x)
#' # Two valid encodings, only one of which is correct
#' read_html(path, encoding = "ISO-8859-1") %>% html_elements("p") %>% html_text()
#' read_html(path, encoding = "ISO-8859-2") %>% html_elements("p") %>% html_text()
#' read_html(path, encoding = "ISO-8859-1") |> html_elements("p") |> html_text()
#' read_html(path, encoding = "ISO-8859-2") |> html_elements("p") |> html_text()
html_encoding_guess <- function(x) {
check_installed("stringi")

Expand Down
4 changes: 2 additions & 2 deletions R/form.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@
#' html <- read_html("http://www.google.com")
#' search <- html_form(html)[[1]]
#'
#' search <- search %>% html_form_set(q = "My little pony", hl = "fr")
#' search <- search |> html_form_set(q = "My little pony", hl = "fr")
#'
#' # Or if you have a list of values, use !!!
#' vals <- list(q = "web scraping", hl = "en")
#' search <- search %>% html_form_set(!!!vals)
#' search <- search |> html_form_set(!!!vals)
#'
#' # To submit and get result:
#' \dontrun{
Expand Down
14 changes: 7 additions & 7 deletions R/html.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
#' url <- "https://rvest.tidyverse.org/articles/starwars.html"
#' html <- read_html(url)
#'
#' html %>%
#' html_element("div") %>%
#' html_children() %>%
#' html |>
#' html_element("div") |>
#' html_children() |>
#' html_name()
#' @export
#' @importFrom xml2 xml_name
Expand All @@ -35,11 +35,11 @@ html_name <- function(x) {
#' <li><a href="https://c.com">b</a></li>
#' </ul>')
#'
#' html %>% html_elements("a") %>% html_attrs()
#' html |> html_elements("a") |> html_attrs()
#'
#' html %>% html_elements("a") %>% html_attr("href")
#' html %>% html_elements("li") %>% html_attr("class")
#' html %>% html_elements("li") %>% html_attr("class", default = "inactive")
#' html |> html_elements("a") |> html_attr("href")
#' html |> html_elements("li") |> html_attr("class")
#' html |> html_elements("li") |> html_attr("class", default = "inactive")
#' @export
#' @importFrom xml2 xml_attr
html_attr <- function(x, name, default = NA_character_) {
Expand Down
14 changes: 7 additions & 7 deletions R/live.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,16 @@
#' # When we retrieve the raw HTML for this site, it doesn't contain the
#' # data we're interested in:
#' static <- read_html("https://www.forbes.com/top-colleges/")
#' static %>% html_elements(".TopColleges2023_tableRow__BYOSU")
#' static |> html_elements(".TopColleges2023_tableRow__BYOSU")
#'
#' # Instead, we need to run the site in a real web browser, causing it to
#' # download a JSON file and then dynamically generate the html:
#'
#' sess <- read_html_live("https://www.forbes.com/top-colleges/")
#' sess$view()
#' rows <- sess %>% html_elements(".TopColleges2023_tableRow__BYOSU")
#' rows %>% html_element(".TopColleges2023_organizationName__J1lEV") %>% html_text()
#' rows %>% html_element(".grant-aid") %>% html_text()
#' rows <- sess |> html_elements(".TopColleges2023_tableRow__BYOSU")
#' rows |> html_element(".TopColleges2023_organizationName__J1lEV") |> html_text()
#' rows |> html_element(".grant-aid") |> html_text()
#' }
read_html_live <- function(url) {
check_installed(c("chromote", "R6"))
Expand Down Expand Up @@ -67,11 +67,11 @@ read_html_live <- function(url) {
#' sess <- read_html_live("https://www.bodybuilding.com/exercises/finder")
#' sess$view()
#'
#' sess %>% html_elements(".ExResult-row") %>% length()
#' sess |> html_elements(".ExResult-row") |> length()
#' sess$click(".ExLoadMore-btn")
#' sess %>% html_elements(".ExResult-row") %>% length()
#' sess |> html_elements(".ExResult-row") |> length()
#' sess$click(".ExLoadMore-btn")
#' sess %>% html_elements(".ExResult-row") %>% length()
#' sess |> html_elements(".ExResult-row") |> length()
#' }
LiveHTML <- R6::R6Class(
"LiveHTML",
Expand Down
12 changes: 6 additions & 6 deletions R/rvest-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,21 @@
#' # Then find elements that match a css selector or XPath expression
#' # using html_elements(). In this example, each <section> corresponds
#' # to a different film
#' films <- starwars %>% html_elements("section")
#' films <- starwars |> html_elements("section")
#' films
#'
#' # Then use html_element() to extract one element per film. Here
#' # we the title is given by the text inside <h2>
#' title <- films %>%
#' html_element("h2") %>%
#' title <- films |>
#' html_element("h2") |>
#' html_text2()
#' title
#'
#' # Or use html_attr() to get data out of attributes. html_attr() always
#' # returns a string so we convert it to an integer using a readr function
#' episode <- films %>%
#' html_element("h2") %>%
#' html_attr("data-id") %>%
#' episode <- films |>
#' html_element("h2") |>
#' html_attr("data-id") |>
#' readr::parse_integer()
#' episode
xml2::read_html
Expand Down
18 changes: 9 additions & 9 deletions R/selectors.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@
#' <p class='important'>This is an important paragraph</p>
#' ")
#'
#' html %>% html_element("h1")
#' html %>% html_elements("p")
#' html %>% html_elements(".important")
#' html %>% html_elements("#first")
#' html |> html_element("h1")
#' html |> html_elements("p")
#' html |> html_elements(".important")
#' html |> html_elements("#first")
#'
#' # html_element() vs html_elements() --------------------------------------
#' html <- minimal_html("
Expand All @@ -54,18 +54,18 @@
#' <li><b>R4-P17</b> is a <i>droid</i></li>
#' </ul>
#' ")
#' li <- html %>% html_elements("li")
#' li <- html |> html_elements("li")
#'
#' # When applied to a node set, html_elements() returns all matching elements
#' # beneath any of the inputs, flattening results into a new node set.
#' li %>% html_elements("i")
#' li |> html_elements("i")
#'
#' # When applied to a node set, html_element() always returns a vector the
#' # same length as the input, using a "missing" element where needed.
#' li %>% html_element("i")
#' li |> html_element("i")
#' # and html_text() and html_attr() will return NA
#' li %>% html_element("i") %>% html_text2()
#' li %>% html_element("span") %>% html_attr("class")
#' li |> html_element("i") |> html_text2()
#' li |> html_element("span") |> html_attr("class")
html_element <- function(x, css, xpath) {
UseMethod("html_element")
}
Expand Down
16 changes: 8 additions & 8 deletions R/session.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,19 @@
#' @export
#' @examples
#' s <- session("http://hadley.nz")
#' s %>%
#' session_jump_to("hadley-wickham.jpg") %>%
#' session_jump_to("/") %>%
#' s |>
#' session_jump_to("hadley-wickham.jpg") |>
#' session_jump_to("/") |>
#' session_history()
#'
#' s %>%
#' session_jump_to("hadley-wickham.jpg") %>%
#' session_back() %>%
#' s |>
#' session_jump_to("hadley-wickham.jpg") |>
#' session_back() |>
#' session_history()
#'
#' \donttest{
#' s %>%
#' session_follow_link(css = "p a") %>%
#' s |>
#' session_follow_link(css = "p a") |>
#' html_elements("p")
#' }
session <- function(url, ...) {
Expand Down
12 changes: 6 additions & 6 deletions R/table.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
#' <tr><td>4</td><td>y</td></tr>
#' <tr><td>10</td><td>z</td></tr>
#' </table>")
#' sample1 %>%
#' html_element("table") %>%
#' sample1 |>
#' html_element("table") |>
#' html_table()
#'
#' # Values in merged cells will be duplicated
Expand All @@ -41,8 +41,8 @@
#' <tr><td colspan='2'>4</td><td>5</td></tr>
#' <tr><td>6</td><td colspan='2'>7</td></tr>
#' </table>")
#' sample2 %>%
#' html_element("table") %>%
#' sample2 |>
#' html_element("table") |>
#' html_table()
#'
#' # If a row is missing cells, they'll be filled with NAs
Expand All @@ -52,8 +52,8 @@
#' <tr><td colspan='2'>3</td></tr>
#' <tr><td>4</td></tr>
#' </table>")
#' sample3 %>%
#' html_element("table") %>%
#' sample3 |>
#' html_element("table") |>
#' html_table()
html_table <- function(x,
header = NA,
Expand Down
8 changes: 4 additions & 4 deletions R/text.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,17 @@
#'
#' # html_text() returns the raw underlying text, which includes whitespace
#' # that would be ignored by a browser, and ignores the <br>
#' html %>% html_element("p") %>% html_text() %>% writeLines()
#' html |> html_element("p") |> html_text() |> writeLines()
#'
#' # html_text2() simulates what a browser would display. Non-significant
#' # whitespace is collapsed, and <br> is turned into a line break
#' html %>% html_element("p") %>% html_text2() %>% writeLines()
#' html |> html_element("p") |> html_text2() |> writeLines()
#'
#' # By default, html_text2() also converts non-breaking spaces to regular
#' # spaces:
#' html <- minimal_html("<p>x&nbsp;y</p>")
#' x1 <- html %>% html_element("p") %>% html_text()
#' x2 <- html %>% html_element("p") %>% html_text2()
#' x1 <- html |> html_element("p") |> html_text()
#' x2 <- html |> html_element("p") |> html_text2()
#'
#' # When printed, non-breaking spaces look exactly like regular spaces
#' x1
Expand Down
16 changes: 8 additions & 8 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -50,21 +50,21 @@ starwars <- read_html("https://rvest.tidyverse.org/articles/starwars.html")
# Then find elements that match a css selector or XPath expression
# using html_elements(). In this example, each <section> corresponds
# to a different film
films <- starwars %>% html_elements("section")
films <- starwars |> html_elements("section")
films

# Then use html_element() to extract one element per film. Here
# we the title is given by the text inside <h2>
title <- films %>%
html_element("h2") %>%
title <- films |>
html_element("h2") |>
html_text2()
title

# Or use html_attr() to get data out of attributes. html_attr() always
# returns a string so we convert it to an integer using a readr function
episode <- films %>%
html_element("h2") %>%
html_attr("data-id") %>%
episode <- films |>
html_element("h2") |>
html_attr("data-id") |>
readr::parse_integer()
episode
```
Expand All @@ -74,7 +74,7 @@ If the page contains tabular data you can convert it directly to a data frame wi
```{r}
html <- read_html("https://en.wikipedia.org/w/index.php?title=The_Lego_Movie&oldid=998422565")

html %>%
html_element(".tracklist") %>%
html |>
html_element(".tracklist") |>
html_table()
```
22 changes: 14 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ starwars <- read_html("https://rvest.tidyverse.org/articles/starwars.html")
# Then find elements that match a css selector or XPath expression
# using html_elements(). In this example, each <section> corresponds
# to a different film
films <- starwars %>% html_elements("section")
films <- starwars |> html_elements("section")
films
#> {xml_nodeset (7)}
#> [1] <section><h2 data-id="1">\nThe Phantom Menace\n</h2>\n<p>\nReleased: 1999 ...
Expand All @@ -57,23 +57,29 @@ films
#> [5] <section><h2 data-id="5">\nThe Empire Strikes Back\n</h2>\n<p>\nReleased: ...
#> [6] <section><h2 data-id="6">\nReturn of the Jedi\n</h2>\n<p>\nReleased: 1983 ...
#> [7] <section><h2 data-id="7">\nThe Force Awakens\n</h2>\n<p>\nReleased: 2015- ...
```

``` r

# Then use html_element() to extract one element per film. Here
# we the title is given by the text inside <h2>
title <- films %>%
html_element("h2") %>%
title <- films |>
html_element("h2") |>
html_text2()
title
#> [1] "The Phantom Menace" "Attack of the Clones"
#> [3] "Revenge of the Sith" "A New Hope"
#> [5] "The Empire Strikes Back" "Return of the Jedi"
#> [7] "The Force Awakens"
```

``` r

# Or use html_attr() to get data out of attributes. html_attr() always
# returns a string so we convert it to an integer using a readr function
episode <- films %>%
html_element("h2") %>%
html_attr("data-id") %>%
episode <- films |>
html_element("h2") |>
html_attr("data-id") |>
readr::parse_integer()
episode
#> [1] 1 2 3 4 5 6 7
Expand All @@ -85,8 +91,8 @@ frame with `html_table()`:
``` r
html <- read_html("https://en.wikipedia.org/w/index.php?title=The_Lego_Movie&oldid=998422565")

html %>%
html_element(".tracklist") %>%
html |>
html_element(".tracklist") |>
html_table()
#> # A tibble: 29 × 4
#> No. Title `Performer(s)` Length
Expand Down
34 changes: 17 additions & 17 deletions demo/tripadvisor.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,32 +5,32 @@ library(rvest)

url <- "http://www.tripadvisor.com/Hotel_Review-g37209-d1762915-Reviews-JW_Marriott_Indianapolis-Indianapolis_Indiana.html"

reviews <- url %>%
read_html() %>%
reviews <- url |>
read_html() |>
html_elements("#REVIEWS .innerBubble")

id <- reviews %>%
html_element(".quote a") %>%
id <- reviews |>
html_element(".quote a") |>
html_attr("id")

quote <- reviews %>%
html_element(".quote span") %>%
quote <- reviews |>
html_element(".quote span") |>
html_text()

rating <- reviews %>%
html_element(".rating .rating_s_fill") %>%
html_attr("alt") %>%
gsub(" of 5 stars", "", .) %>%
rating <- reviews |>
html_element(".rating .rating_s_fill") |>
html_attr("alt") |>
gsub(" of 5 stars", "", .) |>
as.integer()

date <- reviews %>%
html_element(".rating .ratingDate") %>%
html_attr("title") %>%
strptime("%b %d, %Y") %>%
date <- reviews |>
html_element(".rating .ratingDate") |>
html_attr("title") |>
strptime("%b %d, %Y") |>
as.POSIXct()

review <- reviews %>%
html_element(".entry .partial_entry") %>%
review <- reviews |>
html_element(".entry .partial_entry") |>
html_text()

data.frame(id, quote, rating, date, review, stringsAsFactors = FALSE) %>% View()
data.frame(id, quote, rating, date, review, stringsAsFactors = FALSE) |> View()
Loading
Loading