Skip to content

Commit

Permalink
Fixed issue with dplyr and custom class dataframes (#37)
Browse files Browse the repository at this point in the history
* Removed custom classes from processing dataframes. Additional classes assigned to collect dataframes for S3 routing started causing dplyr and vctrs inheritence issues.
* Updated documentation
  • Loading branch information
bryn-g authored Jun 14, 2020
1 parent b6faf44 commit 25b1c99
Show file tree
Hide file tree
Showing 61 changed files with 338 additions and 235 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: vosonSML
Version: 0.29.11
Version: 0.29.12
Title: Collecting Social Media Data and Generating Networks for Analysis
Description: A suite of tools for collecting and constructing networks from social media data.
Provides easy-to-use functions for collecting data across popular platforms (Twitter, YouTube
Expand Down
9 changes: 9 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
# vosonSML 0.29.12

## Bug Fixes
- Fixed an issue with custom classes assigned to dataframes causing an `vctrs` error when using `dplyr` functions. The classes are no longer needed post-method routing so they are simply removed.
- Replaced an instance of the deprecated `dplyr::funs` function that was generating a warning.

## Minor Changes
- Minor documentation updates.

# vosonSML 0.29.11

## Bug Fixes
Expand Down
38 changes: 27 additions & 11 deletions R/AddText.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,26 +35,29 @@ AddText <- function(net, data, ...) {

#' @noRd
#' @export
AddText.default <- function(net, data, ...) {
AddText.default <- function(net, ...) {
stop("Unknown network type passed to AddText.", call. = FALSE)
}

#' @noRd
#' @method AddText activity
#' @export
AddText.activity <- function(net, data, ...) {
AddText.activity <- function(net, ...) {
UseMethod("AddText.activity", net)
}

#' @noRd
#' @export
AddText.activity.default <- function(net, data, ...) {
AddText.activity.default <- function(net, ...) {
stop("Unknown social media type passed to AddText.", call. = FALSE)
}

#' @noRd
#' @export
AddText.activity.twitter <- function(net, data, ...) {
# data <- tibble::as_tibble(data)
class(data) <- rmCustCls(class(data))

net$nodes <- dplyr::left_join(net$nodes, dplyr::select(data, .data$status_id, .data$text), by = "status_id")
net$nodes <- dplyr::left_join(net$nodes,
dplyr::select(data, .data$quoted_status_id, .data$quoted_text) %>%
Expand All @@ -70,7 +73,7 @@ AddText.activity.twitter <- function(net, data, ...) {

net$nodes$vosonTxt_tweet <- HTMLdecode(net$nodes$vosonTxt_tweet)

class(net) <- union(class(net), c("voson_text"))
class(net) <- union(class(net), c("vosontxt"))
cat("Done.\n")

net
Expand All @@ -79,12 +82,15 @@ AddText.activity.twitter <- function(net, data, ...) {
#' @noRd
#' @export
AddText.activity.youtube <- function(net, data, ...) {
# data <- tibble::as_tibble(data)
class(data) <- rmCustCls(class(data))

net$nodes <- dplyr::left_join(net$nodes,
dplyr::select(data, .data$CommentID, .data$Comment) %>%
dplyr::rename(id = .data$CommentID, vosonTxt_comment = .data$Comment),
by = c("id"))

class(net) <- union(class(net), c("voson_text"))
class(net) <- union(class(net), c("vosontxt"))
cat("Done.\n")

net
Expand All @@ -105,6 +111,9 @@ AddText.activity.youtube <- function(net, data, ...) {
#' @name vosonSML::AddText.activity.reddit
#' @export
AddText.activity.reddit <- function(net, data, cleanText = FALSE, ...) {
# data <- tibble::as_tibble(data)
class(data) <- rmCustCls(class(data))

net$nodes <- dplyr::left_join(net$nodes,
dplyr::mutate(data, id = paste0(.data$thread_id, ".", .data$structure)) %>%
dplyr::select(.data$id, .data$subreddit, .data$comment),
Expand All @@ -122,7 +131,7 @@ AddText.activity.reddit <- function(net, data, cleanText = FALSE, ...) {
net$nodes$title <- CleanRedditText(net$nodes$title)
}

class(net) <- union(class(net), c("voson_text"))
class(net) <- union(class(net), c("vosontxt"))
cat("Done.\n")

net
Expand All @@ -132,7 +141,6 @@ AddText.activity.reddit <- function(net, data, cleanText = FALSE, ...) {
#' @method AddText actor
#' @export
AddText.actor <- function(net, ...) {

UseMethod("AddText.actor", net)
}

Expand All @@ -145,14 +153,17 @@ AddText.actor.default <- function(net, ...) {
#' @noRd
#' @export
AddText.actor.twitter <- function(net, data, ...) {
# data <- tibble::as_tibble(data)
class(data) <- rmCustCls(class(data))

net$edges <- dplyr::left_join(net$edges,
dplyr::select(data, .data$status_id, .data$text),
by = c("status_id")) %>%
dplyr::rename(vosonTxt_tweet = .data$text)

net$edges$vosonTxt_tweet <- HTMLdecode(net$edges$vosonTxt_tweet)

class(net) <- union(class(net), c("voson_text"))
class(net) <- union(class(net), c("vosontxt"))
cat("Done.\n")

net
Expand Down Expand Up @@ -192,6 +203,9 @@ AddText.actor.twitter <- function(net, data, ...) {
#' @name vosonSML::AddText.actor.youtube
#' @export
AddText.actor.youtube <- function(net, data, repliesFromText = FALSE, atRepliesOnly = TRUE, ...) {
# data <- tibble::as_tibble(data)
class(data) <- rmCustCls(class(data))

net$edges %<>% dplyr::left_join(dplyr::select(data, .data$CommentID, .data$Comment) %>%
dplyr::rename(comment_id = .data$CommentID, vosonTxt_comment = .data$Comment),
by = c("comment_id"))
Expand Down Expand Up @@ -222,7 +236,7 @@ AddText.actor.youtube <- function(net, data, repliesFromText = FALSE, atRepliesO
# , at_id = NULL # leave in for reference
}

class(net) <- union(class(net), c("voson_text"))
class(net) <- union(class(net), c("vosontxt"))
cat("Done.\n")

net
Expand All @@ -243,7 +257,9 @@ AddText.actor.youtube <- function(net, data, repliesFromText = FALSE, atRepliesO
#' @name vosonSML::AddText.actor.reddit
#' @export
AddText.actor.reddit <- function(net, data, cleanText = FALSE, ...) {

# data <- tibble::as_tibble(data)
class(data) <- rmCustCls(class(data))

# rename the edge attribute containing the thread comment
net$edges <- dplyr::left_join(net$edges,
dplyr::select(data, .data$subreddit, .data$thread_id, .data$id, .data$comment),
Expand All @@ -264,7 +280,7 @@ AddText.actor.reddit <- function(net, data, cleanText = FALSE, ...) {
net$edges$title <- CleanRedditText(net$edges$title)
}

class(net) <- union(class(net), c("voson_text"))
class(net) <- union(class(net), c("vosontxt"))
cat("Done.\n")

net
Expand Down
25 changes: 9 additions & 16 deletions R/AddUserData.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#' @note Only supports twitter actor networks at this time. Refer to \code{\link{AddUserData.actor.twitter}}.
#'
#' @param net A named list of dataframes \code{nodes} and \code{edges} generated by \code{Create}.
#' @param data A dataframe generated by \code{Collect}.
#' @param ... Additional parameters passed to function.
#'
#' @return Network as a named list of two dataframes containing \code{$nodes} and \code{$edges}
Expand All @@ -14,7 +15,7 @@
#' @aliases AddUserData
#' @name vosonSML::AddUserData
#' @export
AddUserData <- function(net, ...) {
AddUserData <- function(net, data, ...) {
# searches the class list of net for matching method
UseMethod("AddUserData", net)
}
Expand All @@ -29,7 +30,6 @@ AddUserData.default <- function(net, ...) {
#' @method AddUserData actor
#' @export
AddUserData.actor <- function(net, ...) {

UseMethod("AddUserData.actor", net)
}

Expand Down Expand Up @@ -81,6 +81,8 @@ AddUserData.actor.twitter <- function(net, data, lookupUsers = TRUE, twitterAuth
cat("Adding user profile data to network...")
if (verbose) { cat("\n") }

class(data) <- rmCustCls(class(data))

dfUsers <- net$nodes

dfUsers %<>% dplyr::mutate_all(as.character) # changes all col types to character
Expand Down Expand Up @@ -126,24 +128,15 @@ AddUserData.actor.twitter <- function(net, data, lookupUsers = TRUE, twitterAuth
}

df_users_info_all %<>% dplyr::rename("display_name" = .data$name, "name" = .data$user_id)

# fix type for numeric value columns and also replace na values with zero for convenience

# numeric value column names in rtweet collected data end with "count"
df_users_info_all %<>% dplyr::mutate_at(vars(ends_with("count")), funs(ifelse(is.na(.data$.), as.integer(0),
as.integer(.data$.))))
df_users_info_all %<>% dplyr::mutate_at(vars(ends_with("count")),
# funs(ifelse(is.na(.data$.), as.integer(0), as.integer(.data$.)))
list(function(x) ifelse(is.na(x), as.integer(0), as.integer(x))))

net$nodes <- df_users_info_all

# func_output <- list(
# "nodes" = df_users_info_all,
# "edges" = net$edges
# )
#
# class(func_output) <- append(class(func_output), c("network", "actor", "twitter"))

# return(func_output)

class(net) <- union(class(net), c("voson_user_data"))
class(net) <- union(class(net), c("vosonuser"))
cat("Done.\n")

net
Expand Down
2 changes: 1 addition & 1 deletion R/AddVideoData.R
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ AddVideoData.actor.youtube <- function(net, youtubeAuth = NULL, videoIds = NULL,
video_published_at = .data$VideoPublishedAt)
}

class(net) <- union(class(net), c("voson_video_data"))
class(net) <- union(class(net), c("vosonvideo"))
cat("Done.\n")

net
Expand Down
33 changes: 21 additions & 12 deletions R/Authenticate.twitter.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
#' @title Twitter API authentication
#'
#' @description Twitter authentication uses OAuth and either requires authorization of the rtweet package rstats2twitter
#' client app by a registered twitter user or twitter app developer API keys as described here:
#' @description Twitter authentication uses OAuth and typically requires four developer API keys
#' generated when you create a twitter app via the twitter developer web site.
#'
#' There is another method available commonly used by third-party apps in which an app can be
#' authorized by a user to use the twitter API on their behalf. The implementation
#' of this method in vosonSML does not require a developer account but does still require
#' the user to have access to a developers apps two consumer API keys. This allows multiple
#' users to access the twitter API with vosonSML via a single developer account and app.
#'
#' The twitter OAuth process is described here:
#' \url{https://developer.twitter.com/en/docs/basics/authentication/overview/oauth}.
#'
#' @param socialmedia Character string. Identifier for social media API to authenticate, set to \code{"twitter"}.
Expand All @@ -17,23 +25,24 @@
#'
#' @examples
#' \dontrun{
#' # twitter authentication via user authorization of app on their account
#' # will open a web browser to twitter prompting the user to log in and authorize the app
#' # apiKey and apiSecret are equivalent to a twitter apps consumer key and secret
#' twitterAuth <- Authenticate("twitter", appName = "An App",
#' apiKey = "xxxxxxxxxxxx", apiSecret = "xxxxxxxxxxxx"
#' )
#'
#' # twitter authentication with developer app api keys
#' # twitter authentication using developer app API keys
#' myDevKeys <- list(appName = "My App", apiKey = "xxxxxxxxxxxx",
#' apiSecret = "xxxxxxxxxxxx", accessToken = "xxxxxxxxxxxx",
#' accessTokenSecret = "xxxxxxxxxxxx")
#'
#' twitterAuth <- Authenticate("twitter", appName = myDevKeys$appName,
#' apiKey = myDevKeys$apiKey, apiSecret = myDevKeys$apiSecret, accessToken = myDevKeys$accessToken,
#' accessTokenSecret = myDevKeys$accessTokenSecret)
#' }
#'
#' # twitter authentication via authorization of an app to their user account
#' # requires the apps consumer API keys
#' # apiKey and apiSecret parameters are equivalent to the apps consumer key and secret
#' # will open a web browser to twitter prompting the user to log in and authorize the app
#' twitterAuth <- Authenticate("twitter", appName = "An App",
#' apiKey = "xxxxxxxxxxxx", apiSecret = "xxxxxxxxxxxx"
#' )
#' }
#'
#' @export
Authenticate.twitter <- function(socialmedia, appName, apiKey, apiSecret, accessToken, accessTokenSecret, ...) {

Expand Down Expand Up @@ -70,5 +79,5 @@ Authenticate.twitter <- function(socialmedia, appName, apiKey, apiSecret, access
access_secret = accessTokenSecret,
set_renv = FALSE)

return(credential)
credential
}
26 changes: 13 additions & 13 deletions R/Collect.reddit.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ Collect.reddit <- function(credential, threadUrls, waitTime = c(3, 10), ua = get
}, error = function(e) {
stop(gsub("^Error:\\s", "", paste0(e)), call. = FALSE)
}, finally = {
threads_df <- as_tibble(threads_df)
threads_df <- tibble::as_tibble(threads_df)
})

if (!is.null(threads_df)) {
Expand Down Expand Up @@ -93,7 +93,7 @@ Collect.reddit <- function(credential, threadUrls, waitTime = c(3, 10), ua = get
class(threads_df) <- append(class(threads_df), c("datasource", "reddit"))
cat("Done.\n")

return(threads_df)
threads_df
}

reddit_build_df <- function(threadUrls, waitTime, ua, verbose) {
Expand All @@ -107,7 +107,7 @@ reddit_build_df <- function(threadUrls, waitTime, ua, verbose) {
# loop protection
prev_value <- NULL

extra_threads <- filter(branch_df, grepl("Listing:", .data$comm_id))
extra_threads <- dplyr::filter(branch_df, grepl("Listing:", .data$comm_id))
while (nrow(extra_threads) > 0) {

row_i <- 1 # top row
Expand All @@ -128,24 +128,24 @@ reddit_build_df <- function(threadUrls, waitTime, ua, verbose) {
cont_thread_id <- gsub("Listing:t1_", "", extra_threads[row_i, "comm_id"])

# set continue thread comment rm flag to true
branch_df <- mutate(branch_df,
rm = ifelse((.data$comm_id == cont_thread_id | .data$comm_id == extra_threads[row_i, "comm_id"]),
TRUE, .data$rm))
branch_df <- dplyr::mutate(branch_df,
rm = ifelse((.data$comm_id == cont_thread_id | .data$comm_id == extra_threads[row_i, "comm_id"]),
TRUE, .data$rm))

# get continue thread
cont_json <- reddit_data(paste0(x, cont_thread_id), waitTime, ua, cont = cont_thread_id, verbose = verbose)
cont_df <- reddit_content_plus(cont_json, x, depth = depth)

# if comments returned
if (nrow(cont_df)) {
cont_df <- cont_df %>% mutate(structure = paste0(struct, "_", .data$structure)) # append structure
cont_df <- cont_df %>% dplyr::mutate(structure = paste0(struct, "_", .data$structure)) # append structure

# insert new comments into thread dataframe using position
if (cont_index == 1) {
branch_df <- bind_rows(cont_df, branch_df)
branch_df <- dplyr::bind_rows(cont_df, branch_df)
} else {
pre_df <- bind_rows(branch_df[1:cont_index-1, ], cont_df)
branch_df <- bind_rows(pre_df, branch_df[cont_index:nrow(branch_df), ])
pre_df <- dplyr::bind_rows(branch_df[1:cont_index-1, ], cont_df)
branch_df <- dplyr::bind_rows(pre_df, branch_df[cont_index:nrow(branch_df), ])
}
}

Expand All @@ -156,14 +156,14 @@ reddit_build_df <- function(threadUrls, waitTime, ua, verbose) {
if (!is.null(branch_df) && nrow(branch_df) > 0) {
branch_df$thread_id <- gsub("^(.*)?/comments/([0-9A-Za-z]{6})?/.*?(/)?$", "\\2",
branch_df$url, ignore.case = TRUE, perl = TRUE, useBytes = TRUE) # extract thread id
branch_df <- branch_df %>% filter(.data$rm == FALSE) # remove continue thread entries
branch_df <- branch_df %>% arrange(.data$thread_id, .data$id) %>% mutate(id = 1:nrow(branch_df)) # re-index
branch_df <- branch_df %>% dplyr::filter(.data$rm == FALSE) # remove continue thread entries
branch_df <- branch_df %>% dplyr::arrange(.data$thread_id, .data$id) %>% dplyr::mutate(id = 1:nrow(branch_df)) # re-index
}

branch_df
})

threads_df <- bind_rows(threads)
threads_df <- dplyr::bind_rows(threads)
}

# based on method by @ivan-rivera RedditExtractoR
Expand Down
Loading

0 comments on commit 25b1c99

Please sign in to comment.