diff --git a/.gitignore b/.gitignore
index 1ce9df9..3dec67a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,6 +24,7 @@ vignettes/*.pdf
 
 # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
 .httr-oauth
+.twitter_oauth_token
 
 # knitr and R markdown default cache directories
 /*_cache/
diff --git a/vosonSML/DESCRIPTION b/vosonSML/DESCRIPTION
index 62acbc5..8a52c8f 100644
--- a/vosonSML/DESCRIPTION
+++ b/vosonSML/DESCRIPTION
@@ -1,15 +1,14 @@
 Package: vosonSML
-Version: 0.24.0
+Version: 0.25.0
 Title: Tools for Collecting Social Media Data and Generating Networks for Analysis
 Description: A suite of tools for collecting and constructing networks from social media data.
     Provides easy-to-use functions for collecting data across popular platforms (Instagram,
     Facebook, Twitter, YouTube and Reddit) and generating different types of networks for analysis.
 Type: Package
-Imports: tm, stringr, twitteR, RCurl, bitops, rjson, plyr, igraph (>= 1.2.2), Rfacebook (>= 0.6.15), 
+Imports: tm, stringr, RCurl, bitops, rjson, plyr, igraph (>= 1.2.2), Rfacebook (>= 0.6.15), 
     Hmisc, data.table, httpuv, instaR, methods, httr, RedditExtractoR (>= 2.1.2), magrittr, 
-    dplyr (>= 0.7.8), rlang (>= 0.3.0.1)
+    dplyr (>= 0.7.8), rlang (>= 0.3.0.1), rtweet (>= 0.6.8)
 Depends: R (>= 3.2.0)
-Suggests: testthat
 Encoding: UTF-8
 Author: Timothy Graham, Robert Ackland, Chung-hong Chan, Bryan Gertzel
 Maintainer: Bryan Gertzel <bryan.gertzel@anu.edu.au>
diff --git a/vosonSML/NAMESPACE b/vosonSML/NAMESPACE
index 831488c..e17059c 100644
--- a/vosonSML/NAMESPACE
+++ b/vosonSML/NAMESPACE
@@ -1,28 +1,28 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(CreateActorNetwork,default)
+S3method(CreateActorNetwork,reddit)
 S3method(CreateActorNetwork,twitter)
+S3method(CreateActorNetwork,youtube)
 S3method(CreateBimodalNetwork,facebook)
 S3method(CreateBimodalNetwork,instagram)
-S3method(CreateBimodalNetwork,twitter)
 S3method(CreateDynamicNetwork,facebook)
 S3method(CreateEgoNetworkFromData,instagram)
-S3method(CreateSemanticNetwork,twitter)
 export(Authenticate)
 export(AuthenticateWithFacebookAPI)
 export(AuthenticateWithInstagramAPI)
-export(AuthenticateWithTwitterAPI)
 export(Collect)
 export(CollectDataFacebook)
 export(CollectDataInstagram)
-export(CollectDataTwitter)
 export(CollectEgoInstagram)
 export(Create)
+export(CreateActorNetwork)
 export(CreateEgoNetwork)
 export(GetYoutubeVideoIDs)
+export(GraphUserInfoTwitter)
+export(ImportData)
 export(LoadCredential)
-export(PopulateUserInfo)
 export(SaveCredential)
-export(importData)
 import(RCurl)
 import(bitops)
 import(data.table)
@@ -38,16 +38,23 @@ importFrom(Rfacebook,fbOAuth)
 importFrom(Rfacebook,getPage)
 importFrom(Rfacebook,getPost)
 importFrom(Rfacebook,getUsers)
+importFrom(dplyr,anti_join)
 importFrom(dplyr,coalesce)
+importFrom(dplyr,distinct)
+importFrom(dplyr,ends_with)
 importFrom(dplyr,filter)
+importFrom(dplyr,funs)
 importFrom(dplyr,group_by)
 importFrom(dplyr,left_join)
 importFrom(dplyr,mutate)
+importFrom(dplyr,mutate_all)
+importFrom(dplyr,mutate_at)
 importFrom(dplyr,rename)
 importFrom(dplyr,row_number)
 importFrom(dplyr,select)
 importFrom(dplyr,summarise)
 importFrom(dplyr,ungroup)
+importFrom(dplyr,vars)
 importFrom(igraph,'V<-')
 importFrom(igraph,V)
 importFrom(igraph,delete.vertices)
@@ -57,6 +64,7 @@ importFrom(igraph,graph_from_data_frame)
 importFrom(igraph,set.graph.attribute)
 importFrom(igraph,set_graph_attr)
 importFrom(igraph,simplify)
+importFrom(igraph,vcount)
 importFrom(igraph,write.graph)
 importFrom(instaR,getComments)
 importFrom(instaR,getFollowers)
@@ -65,17 +73,19 @@ importFrom(instaR,getLikes)
 importFrom(instaR,getUser)
 importFrom(instaR,instaOAuth)
 importFrom(instaR,searchInstagram)
+importFrom(magrittr,'%<>%')
 importFrom(magrittr,'%>%')
 importFrom(plyr,ldply)
 importFrom(rlang,'.data')
+importFrom(rtweet,create_token)
+importFrom(rtweet,lookup_users)
+importFrom(rtweet,rate_limit)
+importFrom(rtweet,search_tweets)
+importFrom(rtweet,users_data)
 importFrom(stats,'na.omit')
 importFrom(stringr,str_extract)
 importFrom(stringr,str_match_all)
 importFrom(stringr,str_replace_all)
-importFrom(twitteR,lookupUsers)
-importFrom(twitteR,searchTwitter)
-importFrom(twitteR,setup_twitter_oauth)
-importFrom(twitteR,twListToDF)
 importFrom(utils,"flush.console")
 importFrom(utils,"install.packages")
 importFrom(utils,"read.csv")
diff --git a/vosonSML/R/Authenticate.R b/vosonSML/R/Authenticate.R
index 6d48a35..9a06267 100644
--- a/vosonSML/R/Authenticate.R
+++ b/vosonSML/R/Authenticate.R
@@ -1,162 +1,91 @@
-## The AuthenticateWithTwitterAPI is not functional because it relies on a "side effect". It is a twitteR design problem.
-## AuthenticateWithFacebookAPI can be fixed to make it functional.
-
-## TODO: Maybe need to unify the variable names, currently there are:
-### facebook: appID, appSecret, extended_permissions, useCachedToken
-### twitter: api_key, api_secret, access_token, access_token_secret, createToken <- inconsistent?
-### youtube: apiKeyYoutube <- inconsistent?
-### instagram: appID, appSecret, useCachedToken
-
-## Maybe make it consistent with only camel, as the rest of the package uses camel, not underscore. But hadleyverse packages usually use underscores:
-## Therefore, unified variable names:
-## appID, appSecret, apiKey, apiSecret, accessToken, accessTokenSecret, useCachedToken, extendedPermissions, createToken
-
-#' Create credential to access social media APIs
+#' Create a credential to access social media APIs
 #'
-#' \code{Authenticate} creates a \code{credential} object that enables R to
-#' make authenticated calls to social media APIs.  A \code{credential} object
-#' is a S3 object with the authentication-related information such as access
-#' tokens and the information on the social media that grant authentication.
-#' \code{Authenticate} is the first step of the \code{Authenticate},
-#' \code{Collect}, \code{Create} workflow.
+#' \code{Authenticate} creates a \code{credential} object that enables R to make authenticated calls to social media
+#' APIs. A \code{credential} object is a S3 object with the authentication-related information such as access tokens
+#' and the information on the social media that grant authentication. \code{Authenticate} is the first step of the
+#' \code{Authenticate}, \code{\link{Collect}} and \code{\link{Create}} workflow.
 #'
-#' @param socialmedia character string, social media API to authenticate,
-#' currently supports "facebook", "youtube", "twitter", "instagram" and "reddit"
-#' @param ... additional parameters for authentication
-#' \code{facebook}: appID, appSecret
-#' \code{youtube}: apiKey
-#' \code{twitter}: apiKey, apiSecret, accessToken, accessTokenSecret
-#' \code{instagram}: appID, appSecret
-#' \code{reddit}: appName, appKey, appSecret, useTokenCache
+#' @param socialmedia Character string. Identifier for social media API to authenticate.\cr
+#' Supports: \code{"twitter"}, \code{"youtube"}, \code{"reddit"}, \code{"instagram"} and \code{"facebook"}.
+#' @param ... Additional parameters for authentication appropriate to \code{socialmedia} identifier.
+#' \describe{
+#'   \item{twitter:}{\code{[appName], apiKey, apiSecret, accessToken, 
+#'                         accessTokenSecret, [useCachedToken]}}
+#'   \item{youtube:}{\code{apiKey}}
+#'   \item{reddit:}{\code{[appName], appKey, appSecret, [useCachedToken]}}
+#'   \item{instagram:}{\code{appID, appSecret, [useCachedToken]}}
+#'   \item{facebook:}{\code{appID, appSecret, [extendedPermissions, useCachedToken]}}
+#' }
 #'
-#' @return credential object with authentication information
+#' @return A \code{credential} object with authentication information.
 #'
-#' @note Currently, \code{Authenticate} with socialmedia = "twitter" generates
-#' oauth information to be used in the current active session only (i.e.
-#' "side-effect") and no authentication-related information will be stored in
-#' the returned \code{credential} object.
+#' @note Currently, \code{Authenticate} with \code{socialmedia = "twitter"} generates OAuth information to be used in
+#' the current active session only (i.e. "side-effect") and no authentication-related information will be stored in the
+#' returned \code{credential} object.
 #'
-#' @author Chung-hong Chan <chainsawtiney@@gmail.com>
-#' @seealso \code{\link{AuthenticateWithFacebookAPI}},
-#' \code{\link{AuthenticateWithInstagramAPI}},
-#' \code{\link{AuthenticateWithTwitterAPI}},
-#' \code{\link{SaveCredential}},
-#' \code{\link{LoadCredential}}
-#' @examples
+#' For other social network API's it's useful to cache the credential to a file and then re-use it in future sessions.
+#' Refer to \code{\link{SaveCredential}} and \code{\link{LoadCredential}} to do this.
+#'
+#' @seealso \code{\link{SaveCredential}}, \code{\link{Collect}}, \code{\link{Create}}
+#' @keywords authenticate credential twitter youtube reddit instagram facebook
 #'
+#' @examples
 #' \dontrun{
 #' require(magrittr)
-#' ## Instagram ego network example
-#' myAppID <- "123456789098765"
-#' myAppSecret <- "abc123abc123abc123abc123abc123ab"
-#' myUsernames <- c("senjohnmccain","obama")
 #'
-#' Authenticate("instagram",
-#' appID = myAappId,
-#' appSecret = myAppSecret) %>% Collect(ego = TRUE,
-#' username = myUsernames) %>% Create
+#' ## youtube actor network example
+#'
+#' myYoutubeAPIKey <- "xxxxxxxxxxxxxxxxxxxxxx"
+#' listYoutubeVideoIDs <- c("W2GZFeYGU3s", "mL27TAJGlWc")
+#'
+#' myActorNetwork <- Authenticate("youtube", apiKey = myYoutubeAPIKey) %>%
+#'   Collect(videoIDs = listYoutubeVideoIDs) %>% Create("actor")
 #'
-#' ## YouTube actor network example
-#' my_apiKeyYoutube <- "314159265358979qwerty"
-#' videoIDs <- c("W2GZFeYGU3s","mL27TAJGlWc")
+#' ## instagram ego network example
 #'
-#' Authenticate("youtube",
-#' apiKey = my_apiKeyYoutube) %>% Collect(videoIDs = videoIDs) %>% Create('actor')
+#' myInstaAppID <- "xxxxxxxxxxx"
+#' myInstaAppSecret <- "xxxxxxxxxxxxxxxxxxxxxx"
+#' listInstaUsernames <- c("senjohnmccain", "obama")
+#'
+#' myEgoNetwork <- Authenticate("instagram", appID = myInstaAppID, appSecret = myInstaAppSecret) %>%
+#'   Collect(ego = TRUE, username = listInstaUsernames) %>% Create("ego")
 #' }
+#'
 #' @export
 Authenticate <- function(socialmedia, ...) {
   authenticator <- switch(tolower(socialmedia),
-                          facebook = facebookAuthenticator,
-                          youtube = youtubeAuthenticator,
                           twitter = twitterAuthenticator,
-                          instagram = instagramAuthenticator,
+                          youtube = youtubeAuthenticator,
                           reddit = redditAuthenticator,
-                          stop("Unknown socialmedia")
-                          )
+                          instagram = instagramAuthenticator,
+                          facebook = facebookAuthenticator,
+                          stop("Unknown socialmedia"))
+  
   auth <- authenticator(...)
+  
   credential <- list(socialmedia = tolower(socialmedia), auth = auth)
   class(credential) <- append(class(credential), "credential")
+  
   return(credential)
 }
 
-### For the side effect of saving the credential into a file.
-### Useful to cache the Credential to a file and then re-use it in the future session.
-### i.e. Authenticate %>% SaveCredential %>% Collect
-### and then, LoadCredential %>% Collect
-
-#' Save and load credential information
-#'
-#' Functions to save and load credential information. Currently, credential
-#' information will be stored as a RDS file. \code{SaveCredential} will return
-#' the input \code{credential}, useful for working as a filter between the
-#' \code{Authenticate} and \code{Collect}.
-#'
-#' @aliases LoadCredential SaveCredential
-#' @param credential \code{credential} object
-#' @param filename character, filename to be saved to or restored from
-#' @return \code{credential} object
-#' @note \code{credential} created from \code{Authenticate} with socialmedia =
-#' 'twitter' will not be saved by SaveCredential
-#' @examples
-#'
-#' \dontrun{
-#' require(magrittr)
-#' myAppID <- "123456789098765"
-#' myAppSecret <- "abc123abc123abc123abc123abc123ab"
-#' myUsernames <- c("senjohnmccain","obama")
-#'
-#' Authenticate("instagram",
-#' appID = myAppId,
-#' appSecret = myAppSecret) %>% SaveCredential("instagramCred.RDS") %>% Collect(ego = TRUE,
-#' username = myUsernames) %>% Create
-#'
-#' ## Load the previously saved credential information
-#' LoadCredential("instagramCred.RDS") %>% Collect(tag="obama",
-#' distance=5000, n=100) %>% Create("bimodal")
-#' }
-#' @export
-SaveCredential <- function(credential, filename = "credential.RDS") {
-  if (credential$socialmedia == "twitter") {
-    warning("Credential created for Twitter will not be saved.")
-  } else {
-    saveRDS(credential, filename)
-  }
-  return(credential)
-}
-
-#' @rdname SaveCredential
-#' @export
-LoadCredential <- function(filename = "credential.RDS") {
-  credential <- readRDS(filename)
-  return(credential)
+twitterAuthenticator <- function(appName, apiKey, apiSecret, accessToken, accessTokenSecret, useCachedToken) {
+  return(AuthenticateWithTwitterAPI(appName, apiKey, apiSecret, accessToken, accessTokenSecret, useCachedToken))
 }
 
-### *Authenticator functions should not be exported. It is just a bunch of helper functions to bridge the AuthenticateWith* functions with Authenticate(), but with datasource as the first argument and always return an auth object
-
-### As a convention, function starts with lower case shouldn't be exported.
-
 youtubeAuthenticator <- function(apiKey) {
-  return(authenticateWithYoutubeAPI(apiKey))
+  return(AuthenticateWithYoutubeAPI(apiKey))
 }
 
-### Currently, this Authenticator will return nothing, only for its side effect
-### SAD!!!!!!!!!!!!!!!!!!
-### i.e. cannot use SaveCredential and LoadCredential!
-
-twitterAuthenticator <- function(apiKey, apiSecret, accessToken, accessTokenSecret, createToken) {
-  AuthenticateWithTwitterAPI(api_key = apiKey, api_secret = apiSecret, access_token = accessToken, access_token_secret = accessTokenSecret, createToken = createToken) # ah, only for its side effect, really bad design decision, twitteR!
+redditAuthenticator <- function(appName, appKey, appSecret, useCachedToken) {
+  # return(AuthenticateWithRedditAPI(appName, appKey, appSecret, useCachedToken))
   return(NULL)
 }
 
-facebookAuthenticator <- function(appID, appSecret, extendedPermissions = FALSE) {
-  return(AuthenticateWithFacebookAPI(appID, appSecret, extended_permissions = extendedPermissions, useCachedToken = FALSE))
-}
-
 instagramAuthenticator <- function(appID, appSecret) {
   return(AuthenticateWithInstagramAPI(appID, appSecret))
 }
 
-redditAuthenticator <- function(appName, appKey, appSecret, useTokenCache) {
-  # return(AuthenticateWithRedditAPI(appName, appKey, appSecret, useTokenCache))
-  return(NULL)
+facebookAuthenticator <- function(appID, appSecret, extendedPermissions = FALSE) {
+  return(AuthenticateWithFacebookAPI(appID, appSecret, extendedPermissions, useCachedToken = FALSE))
 }
diff --git a/vosonSML/R/AuthenticateWithRedditAPI.R b/vosonSML/R/AuthenticateWithRedditAPI.R
index cbdfe71..c935d83 100644
--- a/vosonSML/R/AuthenticateWithRedditAPI.R
+++ b/vosonSML/R/AuthenticateWithRedditAPI.R
@@ -12,14 +12,14 @@
 #' @param appName character string containing the reddit app name associated with the API key.
 #' @param appKey  character string containing the app key.
 #' @param appSecret  character string containing the app secret.
-#' @param useTokenCache logical. Use cached authentication token if found.
+#' @param useCachedToken logical. Use cached authentication token if found.
 #' 
 #' @return a reddit authentication token
 #'
-AuthenticateWithRedditAPI <- function(appName, appKey, appSecret, useTokenCache) {
+AuthenticateWithRedditAPI <- function(appName, appKey, appSecret, useCachedToken) {
 
   if (missing(appName)) {
-    appName <- "reddit"
+    appName <- "vosonSML-reddit"
   }
   
   if (missing(appKey) | missing(appSecret)) {
@@ -27,8 +27,8 @@ AuthenticateWithRedditAPI <- function(appName, appKey, appSecret, useTokenCache)
     return()
   }
 
-  if (missing(useTokenCache)) {
-    useTokenCache <- FALSE
+  if (missing(useCachedToken)) {
+    useCachedToken <- FALSE
   }
   
   # sets up oauth2 for reddit
@@ -44,7 +44,7 @@ AuthenticateWithRedditAPI <- function(appName, appKey, appSecret, useTokenCache)
                                        scope = c("read"),
                                        use_basic_auth = TRUE,
                                        config_init = user_agent("httr oauth"),
-                                       cache = useTokenCache)
+                                       cache = useCachedToken)
 
   return(reddit_token)
 }
diff --git a/vosonSML/R/AuthenticateWithTwitterAPI.R b/vosonSML/R/AuthenticateWithTwitterAPI.R
index 2db9c27..622f9a5 100644
--- a/vosonSML/R/AuthenticateWithTwitterAPI.R
+++ b/vosonSML/R/AuthenticateWithTwitterAPI.R
@@ -1,89 +1,62 @@
-#' Note: this function is DEPRECATED and will be removed in a future release.
-#' Please use the \code{Authenticate} function
+#' Note: this function is DEPRECATED. Please use the \code{\link{Authenticate}} function.
 #'
-#' Twitter API Authentication
+#' Twitter API authentication
 #'
-#' Oauth based authentication with the Twitter API
+#' Oauth based authentication using the Twitter API.
 #'
-#' In order to collect data from Twitter, the user must first authenticate with
-#' Twitter's Application Programming Interface (API).
+#' In order to collect data from Twitter, the user must first authenticate with Twitter's API. This requires setting up 
+#' an app on Twitter. A useful guide to creating an app can be found in the rtweet documentation: 
+#' https://rtweet.info/articles/auth.html#creating-a-twitter-app
 #'
-#' This requires setting up an App on Twitter. An excellent guide to achieving
-#' this can be found at:
-#' http://thinktostart.com/twitter-authentification-with-r/
-#'
-#' @param api_key character string specifying the 'API key' used for
-#' authentication.
-#' @param api_secret character string specifying the 'API secret' used for
-#' authentication.
-#' @param access_token character string specifying the 'access token' used for
-#' authentication.
-#' @param access_token_secret character string specifying the 'access token
-#' secret' used for authentication.
-#' @param createToken logical. !! NOT PROPERLY IMPLEMENTED YET.
-#' @return This is called for its side effect.
-#' @author Timothy Graham <timothy.graham@@anu.edu.au> & Robert Ackland
-#' <robert.ackland@@anu.edu.au>
-#' @seealso \code{AuthenticateWithFacebookAPI} and
-#' \code{AuthenticateWithYouTubeAPI} for other ways to collect social media
-#' data.
-#' @keywords twitter social media SNA
-#' @examples
-#'
-#' \dontrun{
-#'   # Firstly specify your API credentials
-#'   my_api_key <- "1234567890qwerty"
-#'   my_api_secret <- "1234567890qwerty"
-#'   my_access_token <- "1234567890qwerty"
-#'   my_access_token_secret <- "1234567890qwerty"
-#'
-#'   AuthenticateWithTwitterAPI(api_key=my_api_key, api_secret=my_api_secret,
-#'     access_token=my_access_token, access_token_secret=my_access_token_secret)
-#' }
-#' @export
-AuthenticateWithTwitterAPI <-
-function(api_key, api_secret, access_token, access_token_secret, createToken) {
-
-  # EnsurePackage("tm") # we only load packages as required (i.e. if user authenticate with twitter, then we load packages for twitter data collection/analysis)
-  # EnsurePackage("stringr")
-  # EnsurePackage("twitteR")
-  # EnsurePackage("RCurl")
-  # EnsurePackage("bitops")
-  # EnsurePackage("rjson")
-  # EnsurePackage("plyr")
-  # EnsurePackage("igraph")
-
-  if (missing(api_key) | missing(api_secret) | missing(access_token) | missing(access_token_secret)) {
+#' @param appName Character string. Specifies the twitter registered app name associated with API keys.
+#' @param apiKey Character string. Specifies the app 'API key' used for authentication.
+#' @param apiSecret Character string. Specifies the app 'API secret'.
+#' @param accessToken Character string. Specifies the app 'access token'.
+#' @param accessTokenSecret Character string. Specifies the app 'access token secret'.
+#' @param useCachedToken Logical. If \code{TRUE} uses cached API token if found otherwise creates one.
+#' 
+#' @return twitter_oauth. Returns a twitter oauth token object.
+#' 
+#' @seealso \code{\link{Authenticate}}
+#' @keywords authenticate twitter
+#' 
+AuthenticateWithTwitterAPI <- function(appName, apiKey, apiSecret, accessToken, accessTokenSecret,
+                                        useCachedToken) {
+
+  if (missing(apiKey) | missing(apiSecret) | missing(accessToken) | missing(accessTokenSecret)) {
     cat("Error. One or more API credentials arguments are missing.\nPlease specify these. \n")
-    return()
+    return(NULL)
   }
-
-  # We avoid the popup prompt about cached authentication,
-  # and instead include a `createToken` argument in the function,
-  # and directly set the options parameter for the "httr" package.
-  # (And default to no token if the argument is missing)
-
-  origOptions <- options("httr_oauth_cache") # original options setting
-
-  if (missing(createToken)) {
-    createToken <- FALSE # default to no token
+  
+  if (missing(appName)) {
+    appName <- "vosonSML-twitter"
   }
-
-  if (createToken=="TRUE" | createToken=="true" | createToken=="T" | createToken==TRUE) {
-    createToken <- TRUE # handling user input
-  }
-
-  if (createToken) {
-    options(httr_oauth_cache=T)
+  
+  twitter_oauth <- NULL
+  token_file_name <- ".twitter-oauth"
+  
+  if (useCachedToken) {
+    if (file.exists(token_file_name)) {
+      cat("\nCached twitter token was found (using cached token).\n")
+      twitter_oauth <- LoadCredential(token_file_name)
+      # todo: check loaded token is valid before returning
+      return(twitter_oauth)
+    } else {
+      cat("\nOAuth token not found. A token will be created and saved to working directory.\n")
+    }
   }
-  else {
-    options(httr_oauth_cache=F)
+  
+  twitter_oauth <- rtweet::create_token(
+    app = appName,
+    consumer_key = apiKey,
+    consumer_secret = apiSecret,
+    access_token = accessToken,
+    access_secret = accessTokenSecret,
+    set_renv = FALSE)
+  
+  if (useCachedToken) {
+    SaveCredential(twitter_oauth, filename = token_file_name)
   }
-
-  setup_twitter_oauth(api_key,api_secret,access_token,access_token_secret)
-
-  options(httr_oauth_cache=origOptions) # reset options back to the original setting
-
-  return()
-
+  
+  return(twitter_oauth)
 }
diff --git a/vosonSML/R/AuthenticateWithYoutubeAPI.R b/vosonSML/R/AuthenticateWithYoutubeAPI.R
index af818f1..ac4681e 100644
--- a/vosonSML/R/AuthenticateWithYoutubeAPI.R
+++ b/vosonSML/R/AuthenticateWithYoutubeAPI.R
@@ -5,14 +5,13 @@
 #' In order to collect data from YouTube, the user must first authenticate with Google's Application Programming 
 #' Interface (API). Users can obtain a Google Developer API key at: https://console.developers.google.com.
 #'
-#' @param apiKeyYoutube character string specifying your Google Developer API key.
+#' @param apiKey character string specifying your Google Developer API key.
 #' 
 #' @return This is called for its side effect.
 #' 
 #' @note In the future this function will enable users to save the API key in working directory, and the function will 
 #' automatically look for a locally stored key whenever it is called without apiKeyYoutube argument.
 #'
-#' @noRd
-authenticateWithYoutubeAPI <- function(apiKeyYoutube) {
-  return(apiKeyYoutube)
+AuthenticateWithYoutubeAPI <- function(apiKey) {
+  return(apiKey)
 }
diff --git a/vosonSML/R/Collect.R b/vosonSML/R/Collect.R
index ea13fa3..10148d3 100644
--- a/vosonSML/R/Collect.R
+++ b/vosonSML/R/Collect.R
@@ -1,102 +1,123 @@
 #' Collect data from social media for generating networks
 #'
-#' This function collects data from social media APIs, and structures the data
-#' into a data frame of class \code{dataSource.*}, ready for creating networks
-#' for further analysis. \code{Collect} is the second step of the
-#' \code{Authenticate}, \code{Collect}, \code{Create} workflow. This function is
-#' a convenient UI wrapper to the core CollectDataFrom* family of functions.
-#'
-#'
-#' @param credential \code{credential} object generated from
-#' \code{Authenticate}
-#' @param ego logical, collecting ego network data. Currently only support
-#' Instagram.
-#' @param ... additional parameters for data collection (refer to
-#' CollectDataFrom* and CollectEgo* functions)
-#'
-#' \code{facebook}: pageName, rangeFrom, rangeTo, verbose, n, writeToFile, dynamic
-#' \code{youtube}: videoIDs, verbose, writeToFile, maxComments
-#' \code{twitter}: searchTerm, numTweets, verbose, writeToFile, language
-#' \code{instagram}: credential, tag, n, lat, lng, distance, folder, mindate, maxdate, verbose, sleep, writeToFile,
-#' waitForRateLimit
-#' \code{reddit}: threadUrls, waitTime, writeToFile
-#' 
-#' \code{instagram} with \code{ego} = TRUE: username, userid, verbose,
-#' degreeEgoNet, waitForRateLimit, getFollows
-#' @return A data.frame object of class \code{dataSource.*} that can be used
-#' with \code{Create}.
-#' @author Chung-hong Chan <chainsawtiney@@gmail.com>
-#' @seealso \code{CollectDataFacebook},
-#' \code{CollectDataInstagram},
-#' \code{CollectDataTwitter},
-#' \code{CollectEgoInstagram},
-#' \code{CollectDataReddit},
-#' @examples
+#' This function collects data from social media APIs, and structures the data into a data frame of class
+#' \code{dataSource.*}, ready for creating networks for further analysis. \code{Collect} is the second step of the
+#' \code{Authenticate}, \code{Collect}, \code{Create} workflow. This function is a convenient UI wrapper to the core
+#' CollectDataFrom* family of functions.
+#'
+#' @param credential A \code{credential} object generated from \code{Authenticate}.
+#' @param ego Logical. If \code{TRUE} collect ego network data. Currently only supports Instagram.
+#' @param ... Additional parameters for data collection by appropriate to credential \code{socialmedia} type.
+#' Refer to CollectDataFrom* and CollectEgo* functions for more details.
+#' \describe{
+#'   \item{twitter:}{\code{authToken, searchTerm, [searchType, numTweets, includeRetweets, retryOnRateLimit,}\cr
+#'                   \code{writeToFile, verbose, ...]}}
+#'   \item{youtube:}{\code{videoIDs, apiKeyYoutube, [verbose, writeToFile, maxComments]}}
+#'   \item{reddit:}{\code{threadUrls, [waitTime, writeToFile]}}
+#'   \item{instagram:}{\code{tag, n, lat, lng, [distance, folder, mindate, maxdate, verbose, sleep,}\cr
+#'                     \code{writeToFile, waitForRateLimit, credential]}}
+#'   \item{instagram with \code{ego = TRUE}:}{\code{username, userid, [verbose, degreeEgoNet,}\cr
+#'                                            \code{waitForRateLimit, getFollows, credential]}}
+#'   \item{facebook:}{\code{pageName, [rangeFrom, rangeTo, verbose, n, writeToFile, dynamic]}}
+#' }
+#'
+#' @return A data.frame object of class \code{dataSource.*} that can be used with \code{Create}.
 #'
+#' @seealso \code{Authenticate}, \code{Create}
+#' @keywords collect twitter youtube reddit instagram facebook
+#'
+#' @examples
 #' \dontrun{
 #' require(magrittr)
-#' ## Instagram ego network example
-#' myAppID <- "123456789098765"
-#' myAppSecret <- "abc123abc123abc123abc123abc123ab"
-#' myUsernames <- c("senjohnmccain","obama")
-#'
-#' Authenticate("instagram",
-#' appID = myAappId,
-#' appSecret = myAppSecret) %>% Collect(ego = TRUE,
-#' username = myUsernames) %>% Create
-#'
-#' ## YouTube actor network example
-#' my_apiKeyYoutube <- "314159265358979qwerty"
-#' videoIDs <- c("W2GZFeYGU3s","mL27TAJGlWc")
-#'
-#' Authenticate("youtube",
-#' apiKey = my_apiKeyYoutube) %>% Collect(videoIDs = videoIDs) %>% Create('actor')
+#'
+#' ## youtube actor network example
+#'
+#' myYoutubeAPIKey <- "xxxxxxxxxxxxxxxxxxxxxx"
+#' listYoutubeVideoIDs <- c("W2GZFeYGU3s", "mL27TAJGlWc")
+#'
+#' myActorNetwork <- Authenticate("youtube", apiKey = myYoutubeAPIKey) %>%
+#'   Collect(videoIDs = listYoutubeVideoIDs) %>% Create("actor")
+#'
+#' ## instagram ego network example
+#'
+#' myInstaAppID <- "xxxxxxxxxxx"
+#' myInstaAppSecret <- "xxxxxxxxxxxxxxxxxxxxxx"
+#' listInstaUsernames <- c("senjohnmccain", "obama")
+#'
+#' myEgoNetwork <- Authenticate("instagram", appID = myInstaAppID, appSecret = myInstaAppSecret) %>%
+#'   Collect(ego = TRUE, username = listInstaUsernames) %>% Create("ego")
+#'
+#' ## facebook bimodal network example
+#'
+#' myFacebookAppID <- "xxxxxxxxxxx"
+#' myFacebookAppSecret <- "xxxxxxxxxxxxxxxxxxxxxx"
+#'
+#' myBimodalNetwork <- Authenticate("Facebook", appID = myFacebookAppID,
+#'                                  appSecret = myFacebookAppSecret) %>%
+#'   SaveCredential("FBCredential.RDS") %>%
+#'   Collect(pageName = "StarWars", rangeFrom = "2015-03-01", rangeTo = "2015-03-02",
+#'           writeToFile = FALSE) %>%
+#'   Create("bimodal")
+#'
+#' ## facebook dynamic network example
+#'
+#' myDynamicNetwork <- LoadCredential("FBCredential.RDS") %>%
+#'   Collect(pageName = "StarWars", rangeFrom = "2015-03-01", rangeTo = "2015-03-02",
+#'           writeToFile = FALSE) %>%
+#'   Create("dynamic")
 #' }
-#' 
+#'
 #' @export
 Collect <- function(credential, ego = FALSE, ...) {
-    if (ego) {
-        collector <- switch(credential$socialmedia,
-                            instagram = instagramEgo,
-                            stop("Unsupported socialmedia")
-                            )
-    } else {
-        collector <- switch(credential$socialmedia,
-                            facebook = facebookCollector,
-                            youtube = youtubeCollector,
-                            twitter = twitterCollector,
-                            instagram = instagramCollector,
-                            reddit = redditCollector,
-                            stop("Unsupported socialmedia")
-                            )
-    }
-    return(collector(credential, ...))
+  if (ego) {
+    collector <- switch(credential$socialmedia,
+                        instagram = instagramEgo,
+                        stop("Unsupported socialmedia"))
+  } else {
+    collector <- switch(credential$socialmedia,
+                        twitter = twitterCollector,
+                        youtube = youtubeCollector,
+                        reddit = redditCollector,
+                        instagram = instagramCollector,
+                        facebook = facebookCollector,
+                        stop("Unsupported socialmedia"))
+  }
+  
+  return(collector(credential, ...))
 }
 
-### *collector functions should not be exported. It is just a bunch of helper functions to bridge the CollectDataFrom* functions with Collect(), but with credential obj as the first argument
-
-youtubeCollector <-
-    function(credential, videoIDs, verbose, writeToFile, maxComments) {
-        return(collectDataYoutube(videoIDs, apiKeyYoutube = credential$auth, verbose, writeToFile, maxComments))
+twitterCollector <- function(credential, ...) {
+  return(CollectDataTwitter(authToken = credential$auth, ...))
 }
 
-facebookCollector <-
-    function(credential,pageName,rangeFrom,rangeTo,verbose,n,writeToFile) {
-        return(CollectDataFacebook(pageName,rangeFrom,rangeTo,verbose,n,writeToFile, credential))
+# twitterCollector <- function(credential, searchTerm, searchType, numTweets, includeRetweets, retryOnRateLimit, 
+#                               writeToFile, verbose, ...) {
+#   return(CollectDataTwitter(authToken = credential$auth, searchTerm, searchType, numTweets, includeRetweets, 
+#                             retryOnRateLimit, writeToFile, verbose, ...))
+# }
+
+youtubeCollector <- function(credential, ...) {
+  return(CollectDataYoutube(apiKey = credential$auth, ...))
 }
 
-twitterCollector <- function(credential, searchTerm, numTweets, verbose, writeToFile, language, ...) {
-    return(CollectDataTwitter(searchTerm, numTweets, verbose, writeToFile, language, ...)) # credential means nothing to twitteR
+# youtubeCollector <- function(credential, videoIDs, verbose, writeToFile, maxComments) {
+#   return(CollectDataYoutube(videoIDs, apiKeyYoutube = credential$auth, verbose, writeToFile, maxComments))
+# }
+
+redditCollector <- function(credential, threadUrls, waitTime, writeToFile) {
+  return(CollectDataReddit(threadUrls, waitTime, writeToFile))
 }
 
-instagramCollector <- function(credential, tag, n, lat, lng, distance, folder, mindate, maxdate, verbose, sleep, writeToFile, waitForRateLimit) {
-    return(CollectDataInstagram(tag, n, lat, lng, distance, folder, mindate, maxdate, verbose, sleep, writeToFile, waitForRateLimit, credential))
+instagramCollector <- function(credential, tag, n, lat, lng, distance, folder, mindate, maxdate, verbose, sleep,
+                               writeToFile, waitForRateLimit) {
+  return(CollectDataInstagram(tag, n, lat, lng, distance, folder, mindate, maxdate, verbose, sleep, writeToFile,
+                              waitForRateLimit, credential))
 }
 
 instagramEgo <- function(credential, username, userid, verbose, degreeEgoNet, waitForRateLimit, getFollows) {
-    return(CollectEgoInstagram(username, userid, verbose, degreeEgoNet, waitForRateLimit, getFollows, credential))
+  return(CollectEgoInstagram(username, userid, verbose, degreeEgoNet, waitForRateLimit, getFollows, credential))
 }
 
-redditCollector <- function(credential, threadUrls, waitTime, writeToFile) {
-  return(CollectDataReddit(threadUrls, waitTime, writeToFile))
+facebookCollector <- function(credential, pageName, rangeFrom, rangeTo, verbose, n, writeToFile) {
+  return(CollectDataFacebook(pageName, rangeFrom, rangeTo, verbose, n, writeToFile, credential))
 }
diff --git a/vosonSML/R/CollectDataReddit.R b/vosonSML/R/CollectDataReddit.R
index 40278e9..4f74a35 100644
--- a/vosonSML/R/CollectDataReddit.R
+++ b/vosonSML/R/CollectDataReddit.R
@@ -11,23 +11,13 @@
 #' @return A data frame object of class dataSource.reddit that can be used for creating unimodal 
 #' networks (CreateActorNetwork).
 #' 
-CollectDataReddit <- function(threadUrls, waitTime = 5, writeToFile) {
+CollectDataReddit <- function(threadUrls, waitTime = 5, writeToFile = FALSE) {
   
-  if (missing(threadUrls)) {
-    cat("Error. Argument `threadUrls` is missing.\nPlease provide a reddit thread url.\n")
-    return(NA)
+  if (missing(threadUrls) || !is.vector(threadUrls) || length(threadUrls) < 1) {
+    stop("Please provide a vector of one or more reddit thread urls.\n", call. = FALSE)
   }
 
-  if (!is.vector(threadUrls) || length(threadUrls) < 1) {
-    cat("Error. Please provide a vector of one or more reddit thread urls.\n")
-    return(NA)    
-  }
-  
-  if (missing(writeToFile)) {
-    writeToFile <- FALSE
-  }
-
-  cat("\nCollecting thread data for reddit urls:\n")
+  cat("Collecting thread data for reddit urls...\n")
   
   # make the get request for the reddit thread url
   threads_df <- RedditExtractoR::reddit_content(threadUrls, waitTime)
@@ -36,13 +26,12 @@ CollectDataReddit <- function(threadUrls, waitTime = 5, writeToFile) {
   threads_df$thread_id <- gsub("^(.*)?/comments/([0-9A-Za-z]{6})?/.*?(/)?$", "\\2", 
                                threads_df$URL, ignore.case = TRUE, perl = TRUE)
   
-  if (isTrueValue(writeToFile)) {
-    writeOutputFile(threads_df, "csv", "RedditData")
-  }
+  if (writeToFile) { writeOutputFile(threads_df, "csv", "RedditData") }
   
   class(threads_df) <- append(class(threads_df), c("dataSource", "reddit"))
   
-  cat("\nDone!\n")
+  cat("Done.\n")
+  flush.console()
   
   return(threads_df)
 }
\ No newline at end of file
diff --git a/vosonSML/R/CollectDataTwitter.R b/vosonSML/R/CollectDataTwitter.R
index 1cf1d50..9b3d75a 100644
--- a/vosonSML/R/CollectDataTwitter.R
+++ b/vosonSML/R/CollectDataTwitter.R
@@ -1,232 +1,99 @@
-#' Note: this function is DEPRECATED and will be removed in a future release.
-#' Please use the \code{Collect} function
+#' Note: this function is DEPRECATED. Please use the \code{\link{Collect}} function.
 #'
 #' Collect data from Twitter for generating different types of networks
 #'
-#' This function collects data from Twitter based on hashtags or search terms,
-#' and structures the data into a data frame of class
-#' \code{dataSource.twitter}, ready for creating networks for further analysis.
+#' This function collects data from Twitter based on hashtags or search terms, and structures the data into a data 
+#' frame of class \code{dataSource.twitter}, ready for creating networks for further analysis.
 #'
-#' \code{CollectDataTwitter} collects public 'tweets' from Twitter using the
-#' Twitter API.
+#' \code{CollectDataTwitter} collects public 'tweets' from Twitter using the Twitter API.
 #'
-#' The function then finds and maps the relationships of entities of interest
-#' in the data (e.g. users, terms, hashtags), and structures these
-#' relationships into a data frame format suitable for creating unimodal
-#' networks (\code{CreateActorNetwork}), bimodal networks
-#' (\code{CreateBimodalNetwork}), and semantic networks
+#' The function then finds and maps the relationships of entities of interest in the data (e.g. users, terms, hashtags)
+#' , and structures these relationships into a data frame format suitable for creating unimodal networks 
+#' (\code{CreateActorNetwork}), bimodal networks (\code{CreateBimodalNetwork}), and semantic networks 
 #' (\code{CreateSemanticNetwork}).
 #'
-#' The maximum number of tweets for a single call of \code{CollectDataTwitter}
-#' is 1500.
+#' The maximum number of tweets for a single call of \code{CollectDataTwitter} is 1500.
 #'
-#' Language support is available, using the \code{language} argument. The user
-#' can restrict tweets returned to a particular language, using the ISO 639-1
-#' code. For example, restricting to English would use \code{language="en"}.
-#' The full list of codes is available here:
-#' https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes.
+#' Language support is available, using the \code{language} parameter. The user can restrict tweets returned to a 
+#' particular language, using the ISO 639-1 code. For example, restricting to English would use \code{language="en"}. 
+#' The full list of codes is available here: https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes.
 #'
-#' A variety of query operators are available through the Twitter API. For
-#' example, "love OR hate" returns any tweets containing either term (or both).
-#' For more information see the Twitter API documentation (under the heading
+#' A variety of query operators are available through the Twitter API. For example, "love OR hate" returns any tweets 
+#' containing either term (or both). For more information see the Twitter API documentation (under the heading
 #' 'Query Operators'): https://dev.twitter.com/rest/public/search
 #'
-#' @param searchTerm character string, specifying a search term or phrase (e.g.
-#' "Australian politics") or hashtag (e.g. "#auspol"). Many query operators are
-#' available - see the Twitter documentation for more information:
+#' @param authToken Twitter oauth token created by rtweet.
+#' @param searchTerm Character string. Specifies a search term or phrase (e.g. "Australian politics") or hashtag (e.g. 
+#' "#auspol"). Many query operators are available - see the Twitter documentation for more information: 
 #' https://dev.twitter.com/rest/public/search
-#' @param numTweets numeric integer, specifying how many tweets to be
-#' collected. Defaults to 1500. Maximum tweets for a single call of this
-#' function is 1500.
-#' @param verbose logical. If \code{TRUE} then this function will output
-#' runtime information to the console as it computes. Useful diagnostic tool
-#' for long computations. Default is \code{FALSE}.
-#' @param writeToFile logical. If \code{TRUE} then the data is saved to file in
-#' current working directory (CSV format), with filename denoting current
-#' system time and \code{searchTerm}. Default is \code{FALSE}.
-#' @param language character string, restricting tweets to the given language,
-#' given by an ISO 639-1 code. For example, "en" restricts to English tweets.
-#' Defaults to NULL.
-#' @param since If not NULL, restricts tweets to those since the given date. Date is to be formatted
-#' as YYYY-MM-DD (this is a wrapper to the searchTwitter function in the twitteR package).
-#' @param until If not NULL, restricts tweets to those up until the given date. Date is to be formatted
-#' as YYYY-MM-DD (this is a wrapper to the searchTwitter function in the twitteR package).
-#' @param locale If not NULL, will set the locale for the search. As of 03/06/11 only ja is effective,
-#' as per the Twitter API (this is a wrapper to the searchTwitter function in the twitteR package).
-#' @param geocode If not NULL, returns tweets by users located within a given radius of the given
-#' latitude/longitude. (this is a wrapper to the searchTwitter function in the twitteR package).
-#' @param sinceID If not NULL, returns tweets with IDs greater (ie newer) than the specified ID
-#' (this is a wrapper to the searchTwitter function in the twitteR package).
-#' @param maxID If not NULL, returns tweets with IDs smaller (ie older) than the specified ID
-#' (this is a wrapper to the searchTwitter function in the twitteR package).
-#' @param resultType If not NULL, returns filtered tweets as per value. See details for allowed values.
-#' (this is a wrapper to the searchTwitter function in the twitteR package).
-#' @param retryOnRateLimit If non-zero the search command will block retry up to X times if the rate limit
-#' is experienced. This might lead to a much longer run time but the task will
-#' eventually complete if the retry count is high enough (this is a wrapper to the searchTwitter
-#' function in the twitteR package).
-#' @return A data frame object of class \code{dataSource.twitter} that can be
-#' used for creating unimodal networks (\code{CreateActorNetwork}), bimodal
-#' networks (\code{CreateBimodalNetwork}), and semantic networks
+#' @param searchType Character string. Returns filtered tweets as per search type \code{recent}, \code{mixed} or 
+#' \code{popular}. Default type is \code{recent}.
+#' @param numTweets Numeric. Specifies how many tweets to be collected. Defaults is \code{100}.
+#' @param includeRetweets Logical. Specifies if the search should filter out retweets. Defaults is \code{TRUE}.
+#' @param retryOnRateLimit Logical. Default is \code{FALSE}.
+#' @param writeToFile Logical. If \code{TRUE} then the data is saved to file in current working directory (RDS format), 
+#' with filename denoting current system time and \code{searchTerm}. Default is \code{FALSE}.
+#' @param verbose Logical. If \code{TRUE} then this function will output runtime information to the console as it 
+#' computes. Useful diagnostic tool for long computations. Default is \code{FALSE}.
+#' @param ... Additional parameters to pass to the rtweet \code{search_tweets} function.
+#' 
+#' @return A data frame object of class \code{dataSource.twitter} that can be used for creating unimodal networks 
+#' (\code{CreateActorNetwork}), bimodal networks (\code{CreateBimodalNetwork}), and semantic networks 
 #' (\code{CreateSemanticNetwork}).
-#' @note Data generated using this function is *not* suitable for dynamic
-#' networks. Dynamic Twitter networks are not currently implemented in the
-#' vosonSML package. This will be implemented in a future release.
-#' @author Timothy Graham <timothy.graham@@anu.edu.au> & Robert Ackland
-#' <robert.ackland@@anu.edu.au>
-#' @seealso \code{AuthenticateWithTwitterAPI} must be run first or no data will
-#' be collected.
-#' @keywords twitter data mining SNA
-#' @examples
-#'
-#' \dontrun{
-#'   # Firstly specify your API credentials
-#'   my_api_key <- "1234567890qwerty"
-#'   my_api_secret <- "1234567890qwerty"
-#'   my_access_token <- "1234567890qwerty"
-#'   my_access_token_secret <- "1234567890qwerty"
-#'
-#'   # Authenticate with the Twitter API using \code{AuthenticateWithTwitterAPI}
-#'   AuthenticateWithTwitterAPI(api_key=my_api_key, api_secret=my_api_secret,
-#'     access_token=my_access_token, access_token_secret=my_access_token_secret)
-#'
-#'   # Collect tweets data using \code{myTwitterData}
-#'   myTwitterData <- CollectDataTwitter(searchTerm="#auspol",
-#'     numTweets=150,writeToFile=FALSE,verbose=FALSE)
-#'
-#'   # Create an 'actor' network using \code{CreateActorNetwork}
-#'   g_actor_twitter <- CreateActorNetwork(myTwitterData)
-#'
-#'   # Create a 'bimodal' network using \code{CreateBimodalNetwork}
-#'   g_bimodal_twitter <- CreateBimodalNetwork(myTwitterData)
-#'
-#'   # Create a 'semantic' network using \code{CreateSemanticNetwork}
-#'   g_semantic_twitter <- CreateSemanticNetwork(myTwitterData)
-#'   }
-#' @export
-CollectDataTwitter <-
-function(searchTerm, numTweets, verbose, writeToFile, language, since, until,
-  locale, geocode, sinceID, maxID, resultType, retryOnRateLimit) {
-
-# cat(paste("DEBUG - numTweets is set to:", numTweets)) # DEBUG
-
-  # handle the arguments
-
-  if (missing(verbose)) {
-    verbose <- TRUE # default to verbose
-  }
-
-  if (missing(language)) {
-    language <- NULL # default to NULL (as per 'twitteR' package default)
-  }
-
-  if (missing(writeToFile)) {
-    writeToFile <- FALSE # default = not write to file
-  }
-
-  if (verbose=="TRUE" | verbose=="true" | verbose=="T" | verbose==TRUE) {
-    verbose <- TRUE
-  }
-  else {verbose <- FALSE}
-
-  if (missing(numTweets)) {
-    numTweets <- 1500 # default to 1500 max tweets
-  }
-
-  # Ensure that argument `pageName` has been specified by user.
-
-  if (missing(searchTerm)) {
-    cat("Error. Argument `searchTerm` is missing.\nPlease specify a search term or hashtag to collect data from.\n")
-    return(NA)
-  }
-
-  if (missing(since)) {
-    since <- NULL # default to NULL (as per 'twitteR' package default)
-  }
-
-  if (missing(until)) {
-    until <- NULL # default to NULL (as per 'twitteR' package default)
-  }
-
-  if (missing(locale)) {
-    locale <- NULL # default to NULL (as per 'twitteR' package default)
-  }
-
-  if (missing(geocode)) {
-    geocode <- NULL # default to NULL (as per 'twitteR' package default)
-  }
-
-  if (missing(sinceID)) {
-    sinceID <- NULL # default to NULL (as per 'twitteR' package default)
-  }
-
-  if (missing(maxID)) {
-    maxID <- NULL # default to NULL (as per 'twitteR' package default)
-  }
-
-  if (missing(resultType)) {
-    resultType <- NULL # default to NULL (as per 'twitteR' package default)
-  }
-
-  if (missing(retryOnRateLimit)) {
-    retryOnRateLimit <- 0 # default to NULL (as per 'twitteR' package default)
-  }
-
-  # Start data collection
-if (verbose) {
-  cat(paste("Now retrieving data based on search term: ",searchTerm,"\n",sep=""))
+#' 
+#' @note Supported network types: \code{actor}, \code{bimodal}, \code{semantic}
+#' 
+#' Data generated using this function is *not* suitable for dynamic networks.
+#' 
+#' @seealso \code{Collect}
+#' @keywords collect twitter
+#' 
+CollectDataTwitter <- function(authToken = NULL, searchTerm = "", searchType = "recent", numTweets = 100, 
+                               includeRetweets = TRUE, retryOnRateLimit = FALSE, writeToFile = FALSE, 
+                               verbose = FALSE, ...) {
+ 
+  if (!("Token" %in% class(authToken))) { 
+    stop("OAuth token missing. Please use the Authenticate function to create and supply a token.\n", 
+         call. = FALSE)
+  }
+  
+  searchTerm <- trimws(searchTerm)
+  cat(paste0("Collecting tweets", ifelse(searchTerm == "", "", paste0(" for search term: ", searchTerm)), "...\n"))
   flush.console()
-}
-  # Collecting tweets based on hashtag / keyword
-
-  tweetsData <- searchTwitter(searchTerm, n=numTweets, lang=language, since=since, until=until,
-    locale=locale, geocode=geocode, sinceID=sinceID, maxID=maxID, resultType=resultType, retryOnRateLimit=retryOnRateLimit) #1500 is max
-
-  # Convert this data into a dataframe object, for ease of use
-  if (verbose) {
-    cat("Done\n")  ### DEBUG
-    flush.console()
-    cat("Cleaning and sorting the data...\n")
-  }
-  df <- twListToDF(tweetsData) # a better way
-
-  # rename metadata
-    names.twitteR <- c("screenName", "created") # change from
-    names.api <- c("screen_name", "created_at") # change to
-    for(name in names.twitteR) {
-      names(df)[which(names(df)==name)] <- names.api[which(names.twitteR==name)]
-    }
-    df$from_user <- df$screen_name
-
-  # removing odd characters
-  df <- RemoveOddChars(df)
-
-  # extract user info and add to df
-  df <- ExtractUserInfo(df)
-
-  # extract HASHTAG info and add to df
-  df <- ExtractHashtagInfo(df)
-  if (verbose) {
-    cat("Done\n")  ### DEBUG
-    flush.console()
-  }
-  ################################################
-
-  if (writeToFile=="TRUE" | writeToFile=="true" | writeToFile=="T" | writeToFile==TRUE) {
-    tweetsDataDF <- twListToDF(tweetsData) # we just want the original tweets data
-    currTime <- format(Sys.time(), "%b_%d_%X_%Y_%Z")
-    currTime <- gsub(":","_",currTime)
-    write.csv(tweetsDataDF,paste0(currTime,"_",searchTerm,"_TwitterData.csv"))
-    cat("Twitter data was written to current working directory, with filename:\n")
-    cat(paste0(currTime,"_",searchTerm,"_TwitterData.csv"))
-  }
-
-  class(df) <- append(class(df),c("dataSource","twitter"))
-
-  cat("\n")
-
-  return(df)
-
-  ################################################
-
+  
+  rtlimit <- rtweet::rate_limit(authToken, "search/tweets")
+  remaining <- rtlimit[["remaining"]] * 100
+  if (retryOnRateLimit == TRUE & numTweets < remaining) {
+    cat(paste0("Requested ", numTweets, " tweets of ", remaining, " in this rate limit.\n"))
+    cat("Less tweets requested than remaining limit retryOnRateLimit set to FALSE.\n")
+    retryOnRateLimit <- FALSE
+  }
+  
+  search_params <- list()
+  search_params[['token']] <- authToken
+  
+  search_params['q'] <- searchTerm
+  search_params['type'] <- searchType
+  search_params['n'] <- numTweets
+  search_params['include_rts'] <- includeRetweets
+  search_params['retryonratelimit'] <- retryOnRateLimit
+  search_params['verbose'] <- verbose
+  
+  # additional twitter api params
+  dots <- substitute(...())
+  search_params[['...']] <- dots
+  
+  tweets_df <- do.call(rtweet::search_tweets, search_params)
+  
+  cat(paste0("Collected ", nrow(tweets_df), " tweets.\n"))
+  
+  # rds chosen over csv to avoid flattening lists in the data
+  if (writeToFile) { writeOutputFile(tweets_df, "rds", "TwitterData") }
+  
+  cat("Done.\n")
+  flush.console()
+  
+  class(tweets_df) <- append(class(tweets_df), c("dataSource", "twitter"))
+  
+  return(tweets_df)
 }
diff --git a/vosonSML/R/collectDataYoutube.R b/vosonSML/R/CollectDataYoutube.R
similarity index 88%
rename from vosonSML/R/collectDataYoutube.R
rename to vosonSML/R/CollectDataYoutube.R
index 9201cf2..a4d3f15 100644
--- a/vosonSML/R/collectDataYoutube.R
+++ b/vosonSML/R/CollectDataYoutube.R
@@ -12,11 +12,11 @@
 #' For multiple videos, the user may wish to use the function GetYoutubeVideoIDs, which creates a character
 #' vector of video IDs from a plain text file of YouTube video URLs, which can then be used for the videoIDs
 #' argument of the function CollectDataYoutube.
-#'
+#' 
+#' @param apiKey character string, specifying the Google Developer API Key used for authentication.
 #' @param videoIDs character vector, specifying one or more YouTube video IDs. For example, if the video URL is 
 #' 'https://www.youtube.com/watch?v=W2GZFeYGU3s', then use videoIDs='W2GZFeYGU3s'. For multiple videos, the 
 #' function GetYoutubeVideoIDs can be used to create a vector object suitable as input for videoIDs.
-#' @param apiKeyYoutube character string, specifying the Google Developer API Key used for authentication.
 #' @param verbose logical. If TRUE then this function will output runtime information to the console as it 
 #' computes. Useful diagnostic tool for long computations. Default is FALSE.
 #' @param writeToFile logical. If TRUE then the data is saved to file in current working directory (CSV format), 
@@ -25,7 +25,7 @@
 #' *does not* take into account 'reply' comments (i.e. replies to top-level comments), therefore the total number of
 #' comments collected may be higher than maxComments. By default this function attempts to collect all comments.
 #' 
-#' @return A data frame object of class dataSource.youtube that can be used for creating unimodal networks 
+#' @return A dataframe object of class dataSource.youtube that can be used for creating unimodal networks 
 #' (CreateActorNetwork).
 #' 
 #' @note Currently supported network types: unimodal 'actor' network; CreateActorNetwork.
@@ -41,42 +41,19 @@
 #' comments, and one of these top-level comments has 5 'child' or reply comments, then the total number of comments
 #' collected will be equal to 15. Currently, the user must 'guesstimate' the maxResults value, to collect a 
 #' number of comments in the order of what they require.
-#' 
-#' @author Timothy Graham <timothy.graham@@anu.edu.au> & Robert Ackland <robert.ackland@@anu.edu.au>
-#' @seealso Authenticate must be run first or no data will be collected.
 #'
-#' @noRd
-collectDataYoutube <- function(videoIDs, apiKeyYoutube, verbose = FALSE, writeToFile = FALSE, maxComments) {
-  
-  if (missing(verbose)) {
-    verbose <- FALSE # default to not verbose
-  }
-  
-  if (missing(maxComments)) {
-    maxComments <- 10000000000000 # some arbitrary very large number
-  }
-  
-  if (missing(writeToFile)) {
-    writeToFile <- FALSE
-  }
-  
-  if (isTrueValue(verbose)) {
-    verbose <- TRUE
-  }
+CollectDataYoutube <- function(apiKey, videoIDs, verbose = FALSE, writeToFile = FALSE, 
+                               maxComments = 10000000000000) {
   
-  if (missing(apiKeyYoutube)) {
-    cat(paste0("Error. Argument `apiKeyYoutube` is missing. Please specify a valid API key to collect data (i.e. your",
-               " Google Developer API Key).\n"))
-    return(NA)
+  # maxComments defaults to an arbitrary very large number
+
+  if (missing(videoIDs) || !is.vector(videoIDs) || length(videoIDs) < 1) {
+    stop("Please provide a vector of one or more youtube video ids.\n", call. = FALSE)
   }
-  
-  if (missing(videoIDs)) {
-    cat(paste0("Error. Argument `videoIDs` is missing.\nPlease specify a vector of video IDs to collect data from.\n",
-               "Hint: to do this you can use the `GetYoutubeVideoIDs` function in this package."))
-    return(NA)
+ 
+  if (missing(apiKey) || nchar(apiKey) < 1) {
+    stop("Please provide a valid youtube api key.\n", call. = FALSE)
   }
-  
-  apiKey <- apiKeyYoutube # to play nice with existing code
     
   # Start data collection
   
@@ -106,9 +83,7 @@ collectDataYoutube <- function(videoIDs, apiKeyYoutube, verbose = FALSE, writeTo
 
     ## Make a dataframe out of the results
     
-    if (verbose) {
-      cat(paste0("\n** Creating data frame from threads of ", videoIDs[k], ".\n\n", sep = ""))
-    }
+    if (verbose) { cat(paste0("** Creating dataframe from threads of ", videoIDs[k], ".\n", sep = "")) }
     
     tempData <- lapply(rObj$data, function(x) {
       data.frame(Comment = x$snippet$topLevelComment$snippet$textDisplay,
@@ -197,7 +172,7 @@ collectDataYoutube <- function(videoIDs, apiKeyYoutube, verbose = FALSE, writeTo
     
     cat(paste0("\n** Collected replies: ", total_replies, "\n", sep = ""))
     cat(paste0("** Total video comments: ", length(commentIDs) + total_replies, "\n", sep = ""))
-    cat("---------------------------------------------------------------\n\n")
+    cat("---------------------------------------------------------------\n")
     
     ############################## Combine comment threads and replies #############################
       
@@ -222,7 +197,7 @@ collectDataYoutube <- function(videoIDs, apiKeyYoutube, verbose = FALSE, writeTo
   }
 
   if (verbose) {
-    cat("\nCleaning and structuring data. Please be patient.\n")
+    cat("Cleaning and structuring data. Please be patient.\n")
   }
   
   ############################## Map relations between users into dataframe #############################
@@ -244,7 +219,7 @@ collectDataYoutube <- function(videoIDs, apiKeyYoutube, verbose = FALSE, writeTo
   usernamesCleaned <- escapeRegex(usernamesCleaned)
   
   # NEW WAY (OPTIMISED - better, faster, stronger...)
-  dataCombined$ReplyToAnotherUser <- searchCommentsForMentions(commentsTextCleaned, usernamesCleaned)
+  dataCombined$ReplyToAnotherUser <- SearchCommentsForMentions(commentsTextCleaned, usernamesCleaned)
   
   ## Map the comment replies within PARENT COMMENT THREADS into dataframe
   
@@ -260,11 +235,10 @@ collectDataYoutube <- function(videoIDs, apiKeyYoutube, verbose = FALSE, writeTo
     }
   }
   
-  if (isTrueValue(writeToFile)) {
-    writeOutputFile(dataCombined, "csv", "YoutubeData")
-  }
+  if (writeToFile) { writeOutputFile(dataCombined, "csv", "YoutubeData") }
     
-  cat("\nDone!\n")
+  cat("Done.\n")
+  flush.console()
   
   #############################################################################
   # return dataframe to environment
@@ -277,7 +251,6 @@ collectDataYoutube <- function(videoIDs, apiKeyYoutube, verbose = FALSE, writeTo
 }
 
 ## Set up a class and methods/functions for scraping
-
 yt_scraper <- setRefClass(
   "yt_scraper", 
   fields = list(
@@ -339,7 +312,7 @@ yt_scraper <- setRefClass(
     scrape_all = function(maxComments) {
       cat(paste0("** video Id: ", api_opts$videoId ,"\n", sep = ""))
       if (verbose) {
-        cat(paste0("   [results per page: ", api_opts$maxResults, " | max comments per video: ", maxComments, "]\n\n", 
+        cat(paste0("   [results per page: ", api_opts$maxResults, " | max comments per video: ", maxComments, "]\n", 
                    sep = ""))
       }
       
@@ -350,7 +323,7 @@ yt_scraper <- setRefClass(
         thread_count <- scrape()
         
         if (verbose) {
-          cat(paste0("-- Collected threads from page: ", thread_count, "\n\n", sep = ""))
+          cat(paste0("-- Collected threads from page: ", thread_count, "\n", sep = ""))
         }        
         
         if (thread_count == 0 | length(data) > maxComments) {
@@ -364,9 +337,7 @@ yt_scraper <- setRefClass(
             data <<- data[1:maxComments]
           }
           
-          if (verbose) {
-            cat(paste0("-- Done collecting threads.\n\n", sep = "")) 
-          }
+          if (verbose) { cat(paste0("-- Done collecting threads.\n", sep = "")) }
           
           break
         }
@@ -418,7 +389,7 @@ yt_scraper <- setRefClass(
         })
         core_df <<- do.call("rbind", sub_data)
       } else {
-        message("\n`core_df` is already up to date.\n")
+        message("core_df is already up to date.\n")
       }
     }
   )
diff --git a/vosonSML/R/Create.R b/vosonSML/R/Create.R
index 8b00e1e..10c1f22 100644
--- a/vosonSML/R/Create.R
+++ b/vosonSML/R/Create.R
@@ -1,50 +1,31 @@
 #' Create networks from social media data
 #'
-#' This function creates networks from social media data (i.e. from data frames of class \code{dataSource}. 
-#' \code{Create} is the final step of the \code{Authenticate}, \code{Collect}, \code{Create} workflow. This function is
-#' a convenient UI wrapper to the core create*Network family of functions.
+#' This function creates networks from social media data (i.e. collected from dataframes of class \code{social media}).
+#' \code{Create} is the final step of the \code{Authenticate}, \code{Collect}, \code{Create} workflow. This function 
+#' is a wrapper for the Create*Network S3 methods.
 #'
-#' Note: when creating Twitter networks, the user information can be collected separately using the 
-#' \code{\link{PopulateUserInfo}} function and stored into the network as vertex attributes (this involves additional
-#' calls to the Twitter API).
+#' @param dataSource Social media data collected using the \code{Collect} method.
+#' @param type Character string. Type of network to be created, can be \code{actor}, \code{bimodal},
+#' \code{dynamic}, \code{semantic} or \code{ego}.
+#' @param ... Additional parameters for network creation for appropriate \code{social media} and network \code{type}. 
+#' Refer to S3 methods \code{social media} type for default parameters.
 #'
-#' @param dataSource a data frame of class \code{dataSource}
-#' @param type character, type of network to be created, currently supports "actor", "bimodal", "dynamic", "semantic" 
-#' and "ego"
-#' @param ... additional parameters for create*Network functions
-#' @return an igraph graph object
-#' 
-#' @author Chung-hong Chan <chainsawtiney@@gmail.com>
-#' 
-#' @examples
-#' \dontrun{
-#' require(magrittr)
-#' 
-#' ## instagram ego network example
-#' 
-#' my_app_id     <- "123456789098765"
-#' my_app_secret <- "abc123abc123abc123abc123abc123ab"
-#' my_usernames  <- c("senjohnmccain", "obama")
-#'
-#' my_ego_network <- Authenticate("instagram", appID = my_app_id, appSecret = my_app_secret) %>% 
-#'   Collect(ego = TRUE, username = my_usernames) %>% Create
-#'
-#' ## youtube actor network example
-#'
-#' my_api_key   <- "314159265358979qwerty"
-#' my_video_ids <- c("W2GZFeYGU3s","mL27TAJGlWc")
+#' @return Network data containing an igraph object.
 #'
-#' my_actor_network <- Authenticate("youtube", apiKey = my_api_key) %>% 
-#'   Collect(videoIDs = my_video_ids) %>% Create('actor')
+#' @note When creating twitter networks, a network with additional user information can be generated using the
+#' \code{\link{GraphUserInfoTwitter}} function. Additional calls can be made to the twitter API to get information
+#' about users that were identified as nodes during network creation.
+#' 
+#' @seealso \code{\link{CreateActorNetwork}}, \code{\link{CreateBimodalNetwork}}, \code{\link{CreateSemanticNetwork}}
+#' @keywords create actor bimodal semantic network
 #'
-#' }
 #' @export
 Create <- function(dataSource, type = "actor", ...) {
-  
+  # if ego is in the class list
   if (inherits(dataSource, "ego")) {
-    return(CreateEgoNetworkFromData(dataSource)) ## you cannot create actor out of ego data
+    return(CreateEgoNetworkFromData(dataSource)) # you cannot create actor out of ego data
   }
-
+  
   creator <- switch(tolower(type),
                     actor = CreateActorNetwork,
                     bimodal = CreateBimodalNetwork,
@@ -52,9 +33,12 @@ Create <- function(dataSource, type = "actor", ...) {
                     semantic = CreateSemanticNetwork,
                     ego = CreateEgoNetworkFromData,
                     stop("Unknown Type"))
-
-  network_to_return <- creator(dataSource, ...)
-  class(network_to_return) <- append(class(network_to_return), c("vosonSML"))
-
-  return(network_to_return)
+  
+  # calls method mapped to type with parameters passed to create
+  networkToReturn <- creator(dataSource, ...)
+  
+  # creates class as vector that adds network results class and vosonSML class attributes
+  class(networkToReturn) <- append(class(networkToReturn), c("vosonSML"))
+  
+  return(networkToReturn)
 }
diff --git a/vosonSML/R/CreateActorNetwork.R b/vosonSML/R/CreateActorNetwork.R
index daf6e7b..971594e 100644
--- a/vosonSML/R/CreateActorNetwork.R
+++ b/vosonSML/R/CreateActorNetwork.R
@@ -1,46 +1,30 @@
-#' Create actor networks from social media data
+#' Create an actor network from social media data
 #'
-#' This function creates a unimodal 'actor' network from social media data (i.e. from data frames of class dataSource, 
-#' or for Twitter data it is also possible to provide a list of data frames). In this actor network, edges represent 
-#' relationships between actors of the same type (e.g. interactions between Twitter users). For example, with Twitter 
-#' data an interaction is defined as a 'mention' or 'reply' or 'retweet' from user i to user j, given 'tweet' m. With 
-#' YouTube comments, an interaction is defined as a 'reply' or 'mention' from user i to user j, given 'comment' m.
+#' This function creates an actor network from social media data collected using the \code{Collect} method. Edges in 
+#' the network represent interactions or relationships between the actors. For example, with twitter data an 
+#' interaction is defined as a 'mention', reply' or 'retweet' from user i to user j, given 'tweet' m. With youtube 
+#' comments, an interaction is defined as a 'reply' from user i to user j, given 'comment' m. The resulting network is 
+#' returned as an igraph object.
 #'
-#' This function creates a (weighted and directed) unimodal 'actor' network from a data frame of class dataSource 
-#' (which are created using the CollectData family of functions in the vosonSML package), or a list of Twitter data 
-#' frames collected using CollectDataTwitter function.
+#' @param x Collected social media data with \code{social media} class attribute.
+#' @param ... Additional parameters to pass to the network creation method.
+#' @param writeToFile Logical. Save network data to a file in the current working directory. Default is \code{FALSE}.
 #'
-#' The resulting network is an igraph graph object. This graph object is unimodal because edges represent relationships 
-#' between vertices of the same type (read: actors), such as replies/retweets/mentions between Twitter users. Edges are 
-#' directed and weighted (e.g. if user i has replied n times to user j, then the weight of this directed edge equals n).
+#' @seealso \code{\link{Create}}
+#' @keywords create actor twitter youtube reddit
 #'
-#' @param x a data frame of class dataSource. For Twitter data, it is also possible to provide a list of data frames 
-#' (i.e. data frames that inherit class dataSource and twitter). Only lists of Twitter data frames are supported at 
-#' this time. If a list of data frames is provided, then the function binds these row-wise and computes over the entire 
-#' data set.
-#' @param writeToFile logical. If TRUE then the network is saved to file in current working directory (GRAPHML format), 
-#' with filename denoting the current date/time and the type of network
-#' @param ... additional parameters to pass to the network creation method
-#' @return an igraph graph object, with directed and weighted edges
-#' 
-#' @note Not all data sources in vosonSML can be used for creating actor networks.
-#' Currently supported data sources are: YouTube, Twitter
-#'
-#' Other data sources (e.g. Facebook) will be implemented in the future. The user is notified if they try to create 
-#' actor networks for incompatible data sources.
-#'
-#' For Twitter data, actor networks can be created from multiple data frames (i.e. datasets collected individually 
-#' using CollectDataTwitter). Simply create a list of the data frames that you wish to create a network from. For
-#' example: my_list <- list(my_twitter_data_1, my_twitter_data_2, my_twitter_data_3)
-#'
-#' @author Timothy Graham <timothy.graham@@anu.edu.au>, Robert Ackland <robert.ackland@@anu.edu.au>
-#'
-#' @noRd
-CreateActorNetwork <- function(x, writeToFile, ...) {
+#' @export
+CreateActorNetwork <- function(x, ...) {
+  # searches the class list of x for matching method
+  UseMethod("CreateActorNetwork", x)
+}
 
-  if (missing(writeToFile)) {
-    writeToFile <- FALSE
+#' @rdname CreateActorNetwork
+#' @export
+CreateActorNetwork.default <- function(x, ...) {
+  cat("Cannot create actor network using this type of data.\n")
+  
+  if (inherits(x, "temporal")) {
+    cat("The data you supplied is temporal. Please use the CreateDynamicNetwork function for temporal data.\n")
   }
-
-  UseMethod("CreateActorNetwork", x)
 }
diff --git a/vosonSML/R/CreateActorNetwork.default.R b/vosonSML/R/CreateActorNetwork.default.R
deleted file mode 100644
index ff04e88..0000000
--- a/vosonSML/R/CreateActorNetwork.default.R
+++ /dev/null
@@ -1,11 +0,0 @@
-CreateActorNetwork.default <-
-function(x,writeToFile)
-  {
-    if (missing(writeToFile)) {
-      writeToFile <- FALSE # default = not write to file
-    }
-      cat("Error. Cannot create actor network using this type of data (see help file for data types and sources).\n")
-      if (inherits(x,"temporal")) {
-        cat("(The data you supplied is temporal. Please use the `CreateDynamicNetwork` function for temporal data.)\n")
-      }
-  }
diff --git a/vosonSML/R/CreateActorNetwork.reddit.R b/vosonSML/R/CreateActorNetwork.reddit.R
index aac8d77..e8d3d1f 100644
--- a/vosonSML/R/CreateActorNetwork.reddit.R
+++ b/vosonSML/R/CreateActorNetwork.reddit.R
@@ -1,49 +1,30 @@
-#' Creates a reddit actor network from collected threads
-#'
-#' Uses RedditExtractoR::user_network to create an igraph directed actor network with comment ids as edge attribute.
-#'
-#' @param x a dataframe as vosonSML class object containing collected social network data
-#' @param weightEdges logical. Combines and weights directed edges. Can't be used with includeTextData.
-#' @param includeTextData logical. If the igraph network edges should include the comment text as attribute.
-#' @param cleanText logical. If non-alphanumeric, non-punctuation, and non-space characters should be removed from the 
-#' included text attribute data. Default is TRUE
-#' @param writeToFile logical. If the igraph network graph should be written to file.
-#'
-#' @note Can create three types of network graphs:
-#' * Directed graph with subreddit, thread_ids and comment ids as edge attributes - default option
-#' * Directed graph with weighted edges (without comment ids) - weightEdges = TRUE
-#' * Directed graph with comment text included as edge attribute - includeTextData = TRUE
+# Creates a reddit actor network
+# 
+# Uses RedditExtractoR::user_network to create an igraph directed actor network with comment ids as
+# edge attributes.
+#
+#' @param weightEdges Logical. Combines and weights directed network edges. Default is \code{FALSE}.
+#' @param textData Logical. If the igraph network should include the comment text as an edge attribute. 
+#' Cannot be used with the \code{weightEdges} parameter. Default is \code{FALSE}.
+#' @param cleanText Logical. If non-alphanumeric, non-punctuation, and non-space characters should be removed from the 
+#' included text attribute data. Only applies if \code{textData = TRUE}. Default is \code{TRUE}.
 #' 
-#' Comment ids as edge attributes in graphs refer to the Collect dataframe comment id not reddits comment id 
-#' If "Forbidden control character 0x19 found in igraph_i_xml_escape, Invalid value" then set cleanText = TRUE
+#' @return A reddit actor network as igraph object.
 #' 
-#' @return an igraph object of the actor network
-#' 
-CreateActorNetwork.reddit <- function(x, weightEdges, includeTextData, cleanText, writeToFile) {
+#' @rdname CreateActorNetwork
+#' @export
+CreateActorNetwork.reddit <- function(x, weightEdges = FALSE, textData = FALSE, cleanText = TRUE, 
+                                      writeToFile = FALSE, ...) {
   
-  if (missing(writeToFile) || writeToFile != TRUE) {
-    writeToFile <- FALSE
-  }
-
-  if (missing(weightEdges) || weightEdges != TRUE) {
-    weightEdges <- FALSE
-  }
+  # default cleanText = TRUE as reddit comments often contain forbidden XML control characters
   
-  # if weightEdges then includeTextData set FALSE
-  if (missing(includeTextData) || includeTextData != TRUE || weightEdges == TRUE) {
-    includeTextData <- FALSE
-  }
+  # if weightEdges then textData set FALSE
+  if (weightEdges) { textData <- FALSE }
   
-  # default cleanText = TRUE as reddit comments often contain forbidden XML control characters
-  if (missing(cleanText) || cleanText != FALSE) {
-    cleanText <- TRUE
-  } else {
-    cleanText <- FALSE
-  }
+  if (textData == FALSE) { cleanText <- FALSE }
   
-  if (includeTextData == FALSE) {
-    cleanText <- FALSE
-  }
+  cat("Generating reddit actor network...\n")
+  flush.console()
   
   # append string to file name to indicate different graph types, only used if writeToFile = TRUE
   appendToName <- ""
@@ -55,17 +36,30 @@ CreateActorNetwork.reddit <- function(x, weightEdges, includeTextData, cleanText
   # modified from RedditExtractoR::user_network to include the df comment id, subreddit and thread id as edge 
   # attributes to support post-processing. author of sender_receiver_df, node_df, and edge_df @ivan-rivera.
   include_author <- TRUE
-  sender_receiver_df <-
-    thread_df %>% 
+  
+  # select cols and rename id and user
+  sender_receiver_df <- thread_df %>% 
     dplyr::select(.data$id, .data$subreddit, .data$thread_id, .data$structure, .data$user, .data$author, 
                   .data$comment) %>% 
-    dplyr::rename("comment_id" = .data$id, "sender" = .data$user) %>%
+    dplyr::rename("comment_id" = .data$id, "sender" = .data$user)
+  
+  sender_receiver_df %<>%
+    # response_to = "" if structure doesnt have underscore in it
+    # else structure minus last digit '1_1_2' response_to = '1_1' 
     dplyr::mutate(response_to = ifelse(!grepl("_", .data$structure), "", gsub("_\\d+$", "", .data$structure))) %>%
+    
+    # select structure and user from original df
+    # rename structure to response_to and user to receiver
+    # left join sender_receiver_df to response_to, receiver by response_to
     dplyr::left_join(thread_df %>% 
                      dplyr::select(.data$structure, .data$user) %>%
                      dplyr::rename("response_to" = .data$structure, "receiver" = .data$user),
-                     by = "response_to") %>% 
+                     by = "response_to")
+  
+  sender_receiver_df %<>%
+    # inserts author into missing receiver values
     dplyr::mutate(receiver = dplyr::coalesce(.data$receiver, ifelse(include_author, .data$author, ""))) %>%
+    # filter out when sender and receiver same, or if either deleted or empty string
     dplyr::filter(.data$sender != .data$receiver, 
                   !(.data$sender %in% c("[deleted]", "")), 
                   !(.data$receiver %in% c("[deleted]", ""))) %>% 
@@ -93,22 +87,24 @@ CreateActorNetwork.reddit <- function(x, weightEdges, includeTextData, cleanText
 
   # weight edges network graph
   if (weightEdges) {
+    # drop comment id and text
     edge_df$comment_id <- edge_df$title <- NULL
-    edge_df <- edge_df %>% dplyr::group_by(.data$from, .data$to) %>% 
-               dplyr::summarise(weight = sum(.data$weight)) %>% dplyr::ungroup()  
+    edge_df %<>% dplyr::group_by(.data$from, .data$to) %>% 
+                 dplyr::summarise(weight = sum(.data$weight)) %>% dplyr::ungroup()  
   
     appendToName <- "Weighted"
   # include comment text as edge attribute network graph
-  } else if (includeTextData) {
+  } else if (textData) {
     edge_df$weight <- NULL
     
     # rename the edge attribute containing the thread comment
-    edge_df <- edge_df %>% dplyr::rename("vosonTxt_comment" = .data$title)
+    edge_df %<>% dplyr::rename("vosonTxt_comment" = .data$title)
     
     # problem control characters encountered in reddit text
     # edge_df$vosonTxt_comment <- gsub("[\x01\x05\x18\x19\x1C]", "", edge_df$vosonTxt_comment, perl = TRUE)
     appendToName <- "Txt"
     
+    # remove any characters that are not in punctuation, alphanumeric classes or spaces
     if (cleanText) {
       edge_df$vosonTxt_comment <- gsub("[^[:punct:]^[:alnum:]^\\s]", "", edge_df$vosonTxt_comment, perl = TRUE)
       appendToName <- "CleanTxt"
@@ -129,7 +125,7 @@ CreateActorNetwork.reddit <- function(x, weightEdges, includeTextData, cleanText
     writeOutputFile(g, "graphml", name)
   }
   
-  cat("\nDone!\n")
+  cat("Done.\n")
   flush.console()
   
   return(g)
diff --git a/vosonSML/R/CreateActorNetwork.twitter.R b/vosonSML/R/CreateActorNetwork.twitter.R
index 200dd59..7e2afe7 100644
--- a/vosonSML/R/CreateActorNetwork.twitter.R
+++ b/vosonSML/R/CreateActorNetwork.twitter.R
@@ -1,230 +1,191 @@
+# Create twitter actor network
+# 
+# Creates an actor network from collected tweets.
+#
+#' @param verbose Logical. Output additional information about the network creation. Default is \code{FALSE}.
+#' 
+#' @note For twitter data, actor networks can be created from multiple data frames (i.e. datasets collected individually
+#' using \code{Collect} method. Simply create a list of the data frames that you wish to create a network from.
+#' For example, \code{myList <- list(myTwitterData1, myTwitterData2, myTwitterData3)}
+#' 
+#' @return A twitter actor network as list containing a relations dataframe, users dataframe and igraph object.
+#' 
+#' @rdname CreateActorNetwork
 #' @export
-CreateActorNetwork.twitter <-
-function(x,writeToFile)
-{
-
-  from=retweet_from=to=edgeType=timeStamp=tweet_id=users_mentioned=reply_to=NULL # to please the gods of R CMD CHECK
-
-  if (missing(writeToFile)) {
-    writeToFile <- FALSE # default = not write to file
-  }
-
-  df <- x # match the variable names (this must be used to avoid warnings in package compilation?)
+CreateActorNetwork.twitter <- function(x, writeToFile = FALSE, verbose = FALSE, ...) {
 
-  # if `df` is a list of dataframes, then need to convert these into one dataframe
-  # CURRENTLY NOT IMPLEMENTED - there is no method for lists yet.
-  # suppressWarnings(
-  #   if (class(df)=="list") {
-  #   df <- do.call("rbind", df)
-  #   }
-  # )
-
-  # The `hashtags_used` column in `df` causes problems for creating actor network, so delete it:
-  df <- df[,-21]
-
-  # clear any odd characters
-  # df <- removeOddChars(df)
-
-  # convert df to data.table
+  from <- to <- edge_type <- timestamp <- status_id <- NULL
+  is_retweet <- is_quote <- mentions_user_id <- reply_to_user_id <- NULL
+  
+  df <- x
   df <- data.table(df)
-
-    # Now create the dfActorNetwork1, a dataframe of relations between users
-    cat("Generating the network...\n")  ### DEBUG
-    flush.console()
-
-      # for speed we will pre-allocate `dataCombined` to a very large size (more rows than needed)
-      # and after everything is finished we will delete the unused rows
-
-      dataCombined <- data.table(
-        from = as.character(c(rep("NA_f00",20000000))),
-        to = as.character(c(rep("NA_f00",20000000))),
-        edgeType = as.character(c(rep("NA_f00",20000000))),
-        timeStamp = as.character(c(rep("NA_f00",20000000))),
-        tweet_id = as.character(c(rep("NA_f00",20000000)))
-        )
-
-      setkey(dataCombined,from) # set the key value of the data table
-
-      nextEmptyRow <- 1 # so we can update rows in `dataCombined` in a relatively efficient way
-
-      # We firstly do the retweet data
-      for (i in 1:nrow(df)) {
-
-        if (is.na(df[i,retweet_from][[1]])) {next} # we check if there are retweets, if not skip to next row
-
-          # nextEmptyRow <- dataCombined[  , .I[from_userID=="NA_f00"] ][1] # we get index of the next 'empty' row to put data into # NOT NEEDED NOW, BUT USEFUL FOR LATER
-
-          dataCombined[nextEmptyRow, from:= as.character(df$from_user[i][[1]])]
-          dataCombined[nextEmptyRow, to := as.character(df$retweet_from[i][[1]])]
-          dataCombined[nextEmptyRow, edgeType := as.character("Retweet")]
-          dataCombined[nextEmptyRow, timeStamp := as.character(df$created_at[i][[1]])]
-          dataCombined[nextEmptyRow, tweet_id := as.character(df$id[i][[1]])]
-
-          nextEmptyRow <- nextEmptyRow + 1 # increment the row to update in `dataCombined`
-
-      }
-
-      # Next we do the mentions
-      for (i in 1:nrow(df)) {
-
-        if (length(df[i,users_mentioned][[1]]) < 1) {next} # we check if there are likes, if not skip to next row
-
-        for (j in 1:length(df$users_mentioned[i][[1]])){ # for each row of the likes data for post i
-
-          # nextEmptyRow <- dataCombined[  , .I[from_userID=="NA_f00"] ][1] # we get index of the next 'empty' row to put data into # NOT NEEDED NOW, BUT USEFUL FOR LATER
-
-          dataCombined[nextEmptyRow, from := as.character(df$from_user[i][[1]])]
-          dataCombined[nextEmptyRow, to := as.character(df$users_mentioned[i][[1]][j])]
-          dataCombined[nextEmptyRow, edgeType := as.character("Mention")]
-          dataCombined[nextEmptyRow, timeStamp := as.character(df$created_at[i][[1]])]
-          dataCombined[nextEmptyRow, tweet_id := as.character(df$id[i][[1]])]
-
-          nextEmptyRow <- nextEmptyRow + 1 # increment the row to update in `dataCombined`
-
+  
+  df_stats <- networkStats(NULL, "collected tweets", nrow(df))
+  
+  cat("Generating twitter actor network...\n")
+  flush.console()
+  
+  df_users <- data.frame("user_id" = character(0), "screen_name" = character(0))
+  df_users <- rbind(df_users, subset(df, select = c("user_id", "screen_name"), stringsAsFactors = FALSE))
+  
+  # for speed we will pre-allocate dataCombined to a very large size (more rows than needed)
+  # and after everything is finished we will delete the unused rows
+  dataCombined <- data.table(
+    from = as.character(c(rep("NA_f00", 20000000))),
+    to = as.character(c(rep("NA_f00", 20000000))),
+    edge_type = as.character(c(rep("NA_f00", 20000000))), #edgeType
+    timestamp = as.character(c(rep("NA_f00", 20000000))), # timeStamp
+    status_id = as.character(c(rep("NA_f00", 20000000))) # tweet_id
+  )
+  
+  setkey(dataCombined, from) # set the key value of the data table
+  
+  nextEmptyRow <- 1 # so we can update rows in dataCombined in a relatively efficient way
+  
+  ## retweets
+  # this creates a retweet edge between:
+  # from (user retweeting) -- retweet --> to (user that tweeted)
+  count <- 0
+  for (i in 1:nrow(df)) {
+    if ((df[i, is_retweet][[1]] == FALSE) || (is.na(df[i, is_retweet][[1]]))) { next }
+    
+    count <- count + 1
+    
+    dataCombined[nextEmptyRow, from := as.character(df$user_id[i][[1]])]
+    dataCombined[nextEmptyRow, to := as.character(df$retweet_user_id[i][[1]])]
+    dataCombined[nextEmptyRow, edge_type := as.character("retweet")]
+    dataCombined[nextEmptyRow, timestamp := as.character(df$created_at[i][[1]])]
+    dataCombined[nextEmptyRow, status_id := as.character(df$status_id[i][[1]])]
+    
+    df_users <- rbind(df_users, list(df$retweet_user_id[i][[1]], df$retweet_screen_name[i][[1]]))
+    
+    nextEmptyRow <- nextEmptyRow + 1
+  }
+  df_stats <- networkStats(df_stats, "retweets", count, TRUE)
+  
+  ## quotes
+  # this creates a quote edge between:
+  # from (user quoting) -- quote --> to (user being quoted)
+  count <- 0
+  for (i in 1:nrow(df)) {
+    if ((df[i, is_quote][[1]] == FALSE) || (is.na(df[i, is_quote][[1]]))) { next }
+    
+    count <- count + 1
+    
+    dataCombined[nextEmptyRow, from := as.character(df$user_id[i][[1]])]
+    dataCombined[nextEmptyRow, to := as.character(df$quoted_user_id[i][[1]])]
+    dataCombined[nextEmptyRow, edge_type := as.character("quote")]
+    dataCombined[nextEmptyRow, timestamp := as.character(df$created_at[i][[1]])]
+    dataCombined[nextEmptyRow, status_id := as.character(df$status_id[i][[1]])]
+    
+    df_users <- rbind(df_users, list(df$quoted_user_id[i][[1]], df$quoted_screen_name[i][[1]]))
+    
+    nextEmptyRow <- nextEmptyRow + 1
+  }
+  df_stats <- networkStats(df_stats, "quoting others", count, TRUE)
+  
+  # dont create edges for mentions in retweets
+  # if user retweets and types own text with mentions it becomes a quote tweet
+  # and these are then counted
+  if_retweet_inlude_mentions <- FALSE
+  
+  ## mentions
+  # this creates a mention edge between:
+  # from (user tweeting) -- mention / reply mention --> to (user mentioned)
+  count <- 0
+  mcount <- 0
+  rmcount <- 0
+  for (i in 1:nrow(df)) {
+    if ((length(df[i, mentions_user_id][[1]]) < 1) |
+        (length(df[i, mentions_user_id][[1]]) == 1 & is.na(df[i, mentions_user_id][[1]][[1]])) |
+        (if_retweet_inlude_mentions == FALSE & df[i, is_retweet][[1]] == TRUE)) { 
+      next 
+    }
+    
+    count <- count + 1
+    
+    for (j in 1:length(df$mentions_user_id[i][[1]])) { # for each row of the likes data for post i
+      
+      etype <- "mention"
+      if (!is.na(df[i, reply_to_user_id][[1]])) {
+        # skip reply to actor as have this edge in replies
+        if (df[i, reply_to_user_id][[1]] == df$mentions_user_id[i][[1]][j]) {
+          next
         }
-
+        etype <- "reply mention"
+        rmcount <- rmcount + 1
+      } else {
+        mcount <- mcount + 1 
       }
-
-      # Finally, we do the replies data
-      for (i in 1:nrow(df)) {
-
-        if (is.na(df[i,reply_to][[1]])) {next} # we check if there are retweets, if not skip to next row
-
-          # nextEmptyRow <- dataCombined[  , .I[from_userID=="NA_f00"] ][1] # we get index of the next 'empty' row to put data into # NOT NEEDED NOW, BUT USEFUL FOR LATER
-
-          dataCombined[nextEmptyRow, from:= as.character(df$from_user[i][[1]])]
-          dataCombined[nextEmptyRow, to := as.character(df$reply_to[i][[1]])]
-          dataCombined[nextEmptyRow, edgeType := as.character("Reply")]
-          dataCombined[nextEmptyRow, timeStamp := as.character(df$created_at[i][[1]])]
-          dataCombined[nextEmptyRow, tweet_id := as.character(df$id[i][[1]])]
-
-          nextEmptyRow <- nextEmptyRow + 1 # increment the row to update in `dataCombined`
-
-      }
-
-      # we now delete all the rows at the end of `dataCombined` that are unused
-      dataCombined <- dataCombined[edgeType != "NA_f00"] # we just keep the rows that are unchanged from the original dummy data values
-
-    ## --------------------------------
-
-    # make a vector of all the unique actors in the network1
-    # actorsNames <- unique(c(as.character(dataCombined$from),as.character(dataCombined$to)))
-    actorsNames <- unique(factor(c(as.character(unique(dataCombined$from)),as.character(unique(dataCombined$to)))))
-
-#
-# # cat(actorsNames) # DEBUG
-#
-#     # Retrieve all the user details (e.g. follower count, number of tweets, etc) and include as node attributes.
-#       # NOTE: Given API rate limits, the below implementation supports up to 7500 users overall in dataset (150 requests * 50 users per request).
-#       # NOTE: Future work needs to address the Twitter API rate limit for looking up user information (150 requests per 15 minutes).
-#       # NOTE: Requesting 50 users at a time seems to avoid rate limit errors (it's a safe bet...).
-#
-#         # This function is supposed to perform the lookups in batches
-#         # and mind the rate limit:
-#         getUserObjects <- function(users) {
-#           groups <- split(users, ceiling(seq_along(users)/50))
-#           userObjects <- ldply(groups, function(group) { # ldply is a very cool function, found in plyr package.
-#             objects <- lookupUsers(group)
-#             out <- twListToDF(objects) # twListToDF is also a handy function, found in twitteR package. Converts weird class object to data frame.
-#               # print("Waiting for 15 minutes (to 'refresh' the rate limit)...") # Don't need to use this yet. Implement later for number of users > 7500 (have to do chunked batches... chunks of chunks... urrghh)
-#               # Sys.sleep(900)
-#             return(out)
-#           })
-#           return(userObjects)
-#         }
-#
-#     # Putting it into action:
-#     usersInformationAttributes <- getUserObjects(actorsNames)
-#     actorsInfoDF <- usersInformationAttributes
-#
-#     # Need to clean the user text collected here (get rid of odd characters):
-#     # actorsInfoDF <- RemoveOddCharsUserInfo(actorsInfoDF) # uses the new function in v2_munge_tweets.R
-#
-#     # We sometimes have a PROBLEM of missing actors (no info could be retrieved for them - might be misspellings/errors/pun or joke, etc)
-#     # So, identify which users are missing from original set to retrieved set,
-#     # then ensure these users/connections are removed before proceeding onwards:
-#
-#     missingActors <- setdiff(actorsNames,usersInformationAttributes$screenName)
-#       # NOTE: This is a horrible approach, need to optimise.
-#     missingTemp <- NULL # store the indexes of "offending" edge connections (i.e. bad/missing actors)
-#       # NOTE: Obviously the 'offending' users can only be found in the 2nd column
-#       # NOTE: Ipso facto, if they are not real/actual users, then they can't be the source of a directed edge
-#
-#     for (i in 1:length(missingActors)) {
-#       missingTemp <- c(missingTemp, which(missingActors[i] == dataCombined$to))
-#     }
-#
-#     # REMOVE the offendors:
-#       if(length(missingTemp) > 0) {
-#       dataCombined <- dataCombined[-missingTemp,]
-#       }
-#
-#     # REMOVE any duplicated usernames in the retrieved user information (NOT SURE HOW/WHY THIS WOULD OCCUR **NEED TO CHECK**):
-#     # duplicatedUsers <- which(duplicated(actorsInfoDF$screenName))
-#
-#     # if(length(duplicatedUsers) > 0) {
-#     #   actorsInfoDF <- actorsInfoDF[-duplicatedUsers,]
-#     # }
-#
-#     actors <- data.frame(
-#       name=actorsInfoDF$screenName,
-#       userDescription=actorsInfoDF$description,
-#       statusesCount=actorsInfoDF$statusesCount,
-#       followersCount=actorsInfoDF$followersCount,
-#       favoritesCount=actorsInfoDF$favoritesCount,
-#       friendsCount=actorsInfoDF$friendsCount,
-#       url=actorsInfoDF$url,
-#       realName=actorsInfoDF$name,
-#       dateAccountCreated=actorsInfoDF$created,
-#       userLocation=actorsInfoDF$location,
-#       userLanguage=actorsInfoDF$lang,
-#       numberOfListsUserIsFeaturedOn=actorsInfoDF$listedCount,
-#       profileImageUrl=actorsInfoDF$profileImageUrl
-#       )
-#
-#     # actors <- actors[-which(duplicated(actors$name)),]
-#     # actors <- unique(actors)
-#
-#     # make a dataframe of the relations between actors
-#       # NOTE - FUTURE WORK: include edge attributes to specify the specific type of "mentions" (see previous comments on temporal network problem (see: approx. LINES 113-116)).
-#       # NOTE - For example, "RETWEET" versus "TWEET TO" (@username specified beginning of tweet) versus "MENTION" (@username specified somewhere else in tweet text)
-#
-#     # return(df) # DEBUG
-
-    relations <- data.frame(
-      from=dataCombined$from,
-      to=dataCombined$to,
-      edgeType=dataCombined$edgeType,
-      timeStamp=dataCombined$timeStamp,
-      tweet_id=dataCombined$tweet_id)
-
-    ##### STEP FOUR #####
-# cat("\n I got to the final step before network generation")
-
-    # convert into a graph
-    # note: suppressing warnings is used to avoid this error:
-    #     In if (class(newval) == "factor") { :
-    #     the condition has length > 1 and only the first element will be used
-
-  suppressWarnings(
-    g <- graph.data.frame(relations, directed=TRUE, vertices=actorsNames) # used to be vertices=actors (when it collected user data)
-  )
-
-    # Make the node labels play nice with Gephi
-    V(g)$label <- V(g)$name
-
-    if (writeToFile=="TRUE" | writeToFile=="true" | writeToFile=="T" | writeToFile==TRUE) {
-      # Output the final network to a graphml file, to import directly into Gephi
-      currTime <- format(Sys.time(), "%b_%d_%X_%Y_%Z")
-      currTime <- gsub(":","_",currTime)
-      write.graph(g,paste0(currTime,"_TwitterActorNetwork.graphml"),format="graphml")
-      cat("Twitter actor network was written to current working directory, with filename:\n")
-      cat(paste0(currTime,"_TwitterActorNetwork.graphml"))
+      
+      dataCombined[nextEmptyRow, from := as.character(df$user_id[i][[1]])]
+      dataCombined[nextEmptyRow, to := as.character(df$mentions_user_id[i][[1]][j])]
+      dataCombined[nextEmptyRow, edge_type := as.character(etype)]
+      dataCombined[nextEmptyRow, timestamp := as.character(df$created_at[i][[1]])]
+      dataCombined[nextEmptyRow, status_id := as.character(df$status_id[i][[1]])]
+      
+      df_users <- rbind(df_users, list(df$mentions_user_id[i][[1]][j], df$mentions_screen_name[i][[1]][j]))
+      
+      nextEmptyRow <- nextEmptyRow + 1
     }
-
-    cat("\nDone.\n") ### DEBUG
-    flush.console()
-
-    return(g)
-
+  }
+  df_stats <- networkStats(df_stats, "mentions", mcount, TRUE)
+  df_stats <- networkStats(df_stats, "reply mentions", rmcount, TRUE)
+  
+  ## replies
+  # this creates a reply edge between:
+  # from (user replying) -- reply --> to (user being replied to)
+  count <- 0
+  for (i in 1:nrow(df)) {
+    if (is.na(df[i, reply_to_user_id][[1]])) { next } # we check if there are retweets, if not skip to next row - reply_to
+    
+    count <- count + 1
+    
+    dataCombined[nextEmptyRow, from:= as.character(df$user_id[i][[1]])]
+    dataCombined[nextEmptyRow, to := as.character(df$reply_to_user_id[i][[1]])]
+    dataCombined[nextEmptyRow, edge_type := as.character("reply")]
+    dataCombined[nextEmptyRow, timestamp := as.character(df$created_at[i][[1]])]
+    dataCombined[nextEmptyRow, status_id := as.character(df$status_id[i][[1]])]
+    
+    df_users <- rbind(df_users, list(df$reply_to_user_id[i][[1]], df$reply_to_screen_name[i][[1]]))
+    
+    nextEmptyRow <- nextEmptyRow + 1 # increment the row to update in dataCombined
+  }
+  df_stats <- networkStats(df_stats, "replies", count, TRUE)
+  
+  dataCombined <- dataCombined[edge_type != "NA_f00"]
+  
+  # make a vector of all the unique actors in the network
+  df_users <- unique(df_users)
+  
+  df_stats <- networkStats(df_stats, "nodes", nrow(df_users))
+  df_stats <- networkStats(df_stats, "edges", sum(df_stats$count[df_stats$edge_count == TRUE]))
+  
+  # print stats
+  if (verbose) { networkStats(df_stats, print = TRUE) }
+  
+  df_relations <- data.frame(
+    from = dataCombined$from,
+    to = dataCombined$to,
+    edge_type = dataCombined$edge_type,
+    timestamp = dataCombined$timestamp,
+    status_id = dataCombined$status_id)
+  
+  g <- graph.data.frame(df_relations, directed = TRUE, vertices = df_users)
+  
+  V(g)$screen_name <- ifelse(is.na(V(g)$screen_name), paste0("ID:", V(g)$name), V(g)$screen_name)
+  V(g)$label <- V(g)$screen_name
+  
+  if (writeToFile) { writeOutputFile(g, "graphml", "TwitterActorNetwork") }
+  
+  cat("Done.\n")
+  flush.console()
+  
+  function_output <- list(
+    "relations" = df_relations,
+    "users" = df_users,
+    "graph" = g
+  )
+  
+  return(function_output)
 }
diff --git a/vosonSML/R/CreateActorNetwork.youtube.R b/vosonSML/R/CreateActorNetwork.youtube.R
index faed427..395ee8b 100644
--- a/vosonSML/R/CreateActorNetwork.youtube.R
+++ b/vosonSML/R/CreateActorNetwork.youtube.R
@@ -1,19 +1,12 @@
-#' Create YouTube Actor Network
-#'
-#' Creates a unimodal actor network based on comments and replies to one or more youtube videos.
-#'
-#' @param x dataframe containing comments data collected and structured by CollectDataYoutube.
-#' @param writeToFile boolean, if TRUE then igraph data is saved to a file in the current working directory in 
-#' graphml format. The file name will contain the current system time. Default is FALSE.
+# Create youtube actor network
+# 
+# Creates a unimodal actor network based on comments and replies to one or more youtube videos.
+#
+#' @return A youtube actor network as igraph object.
 #' 
-#' @return igraph object containing the actor network with edge attribute comment id
-#' 
-#' @noRd
-CreateActorNetwork.youtube <- function(x, writeToFile) {
-
-  if (missing(writeToFile)) {
-    writeToFile <- FALSE
-  }
+#' @rdname CreateActorNetwork
+#' @export
+CreateActorNetwork.youtube <- function(x, writeToFile = FALSE, ...) {
 
   df_comments <- x # match the variable names to avoid warnings in package compilation
 
@@ -22,12 +15,14 @@ CreateActorNetwork.youtube <- function(x, writeToFile) {
   # 2 User         5 PublishTime   8 ReplyToAnotherUser
   # 3 ReplyCount   6 CommentId     9 VideoID
 
+  cat("Generating youtube actor network...\n")
+  flush.console()
+  
   if (nrow(df_comments) == 0) {
-    cat(paste0("\nOops! There are no user comments to make a network from.\nPlease find video(s) where users have",
-               " commented on a video or to each other.\nReturning...\n"))
-    return()
+    stop(paste0("There are no user comments to make a network from, please check that the videos selected ",
+               "for collection have comments.\n"), call. = FALSE)
   }
-
+  
   # direct comments which are not replies to others to a video id node
   # in the graph the video nodes will appear as VIDEO:AbCxYz where AbCxYz is the id
   not_replies <- which(df_comments$ReplyToAnotherUser == "FALSE" & df_comments$ParentID == "None")
@@ -54,11 +49,9 @@ CreateActorNetwork.youtube <- function(x, writeToFile) {
   V(g)$label <- V(g)$name
 
   # output the final network to a graphml file
-  if (isTrueValue(writeToFile)) {
-    writeOutputFile(g, "graphml", "YoutubeActorNetwork")
-  }
+  if (writeToFile) { writeOutputFile(g, "graphml", "YoutubeActorNetwork") }
 
-  cat("\nDone!\n")
+  cat("Done.\n")
   flush.console()
 
   return(g)
diff --git a/vosonSML/R/CreateBimodalNetwork.R b/vosonSML/R/CreateBimodalNetwork.R
index 03b22e7..116f9c5 100644
--- a/vosonSML/R/CreateBimodalNetwork.R
+++ b/vosonSML/R/CreateBimodalNetwork.R
@@ -1,102 +1,56 @@
-#' Note: this function is DEPRECATED and will be removed in a future release.
-#' Please use the \code{Create} function
-#'
 #' Create bimodal networks from social media data
 #'
-#' This function creates a bimodal network from social media data (i.e. from
-#' data frames of class \code{dataSource}, or for Twitter data it is also
-#' possible to provide a *list* of data frames), with edges representing
-#' relationships between actors of two different types (e.g. Facebook users and
-#' Facebook posts, with edges representing whether a user has commented or
-#' 'liked' a post).
-#'
-#' This function creates a (directed and weighted) bimodal network from a data
-#' frame of class \code{dataSource} (which are created using the `CollectData`
-#' family of functions in the vosonSML package), or a *list* of Twitter
-#' data frames collected using \code{CollectDataTwitter} function.
-#'
-#' The resulting network is an igraph graph object. This graph object is
-#' bimodal because edges represent relationships between vertices of two
-#' different types. For example, in a bimodal Facebook network, vertices
-#' represent Facebook users or Facebook posts, and edges represent whether a
-#' user has commented or 'liked' a post. Edges are directed and weighted (e.g.
-#' if user i has commented n times on post j, then the weight of this directed
-#' edge equals n).
-#'
-#' @param x a data frame of class \code{dataSource}. For Twitter data, it is
-#' also possible to provide a *list* of data frames (i.e. data frames that
-#' inherit class \code{dataSource} and \code{twitter}). Only lists of Twitter
-#' data frames are supported at this time. If a list of data frames is
-#' provided, then the function binds these row-wise and computes over the
-#' entire data set.
-#' @param writeToFile logical. If \code{TRUE} then the network is saved to file
-#' in current working directory (GRAPHML format), with filename denoting the
-#' current date/time and the type of network.
-#' @param removeTermsOrHashtags character vector. Default is none. Otherwise
-#' this argument specifies which terms or hashtags (i.e. vertices with matching
-#' `name`) should be removed from the bimodal network. This is useful to remove
-#' the search term or hashtag that was used to collect the data (i.e. remove
-#' the corresponding vertex in the graph). For example, a value of "#auspol"
-#' means that if there is a vertex with the exact name "#auspol" then this
-#' vertex will be removed.
+#' This function creates a bimodal network from social media data (i.e. from data frames of class \code{dataSource}, or 
+#' for Twitter data it is also possible to provide a *list* of data frames), with edges representing relationships 
+#' between actors of two different types (e.g. Facebook users and Facebook posts, with edges representing whether a 
+#' user has commented or 'liked' a post).
+#'
+#' This function creates a (directed and weighted) bimodal network from a data frame of class \code{dataSource} (which 
+#' are created using the 'CollectData' family of functions in the vosonSML package), or a *list* of Twitter data 
+#' frames collected using \code{CollectDataTwitter} function.
+#'
+#' The resulting network is an igraph graph object. This graph object is bimodal because edges represent relationships 
+#' between vertices of two different types. For example, in a bimodal Facebook network, vertices represent Facebook 
+#' users or Facebook posts, and edges represent whether a user has commented or 'liked' a post. Edges are directed and 
+#' weighted (e.g. if user i has commented n times on post j, then the weight of this directed edge equals n).
+#'
+#' @param x A data frame of class \code{dataSource}. For Twitter data, it is also possible to provide a *list* of data 
+#' frames (i.e. data frames that inherit class \code{dataSource} and \code{twitter}). Only lists of Twitter data 
+#' frames are supported at this time. If a list of data frames is provided, then the function binds these row-wise and 
+#' computes over the entire data set.
+#' @param writeToFile Logical. If \code{TRUE} then the network is saved to file in current working directory (GRAPHML 
+#' format), with filename denoting the current date/time and the type of network.
+#' @param removeTermsOrHashtags Character string. Default is none. Otherwise this argument specifies which terms or 
+#' hashtags (i.e. vertices with matching 'name') should be removed from the bimodal network. This is useful to remove
+#' the search term or hashtag that was used to collect the data (i.e. remove the corresponding vertex in the graph). 
+#' For example, a value of "#auspol" means that if there is a vertex with the exact name "#auspol" then this vertex 
+#' will be removed.
+#' @param ... Additional parameters to pass to the network creation method.
+#' 
 #' @return An igraph graph object, with weighted and directed edges.
-#' @note Not all data sources in vosonSML can be used for creating
-#' bimodal networks.
-#'
-#' Currently supported data sources are:
-#'
-#' - Facebook - Twitter
-#'
-#' Other data sources (e.g. YouTube) will be implemented in the future.
-#' Additionally, the user is notified if they try to create bimodal networks
-#' for incompatible data sources.
-#'
-#' For Twitter data, bimodal networks can be created from multiple data frames
-#' (i.e. datasets collected individually using CollectDataTwitter). Simply
-#' create a list of the data frames that you wish to create a network from. For
-#' example, \code{myList <- list(myTwitterData1, myTwitterData2,
-#' myTwitterData3)}.
-#' @author Timothy Graham <timothy.graham@@anu.edu.au> & Robert Ackland
-#' <robert.ackland@@anu.edu.au>
-#' @seealso See \code{CollectDataFacebook} and \code{CollectDataTwitter} to
-#' collect data for creating bimodal networks in vosonSML.
+#' 
+#' @note Supported data sources: \code{facebook}, \code{twitter}
+#'
+#' For Twitter data, bimodal networks can be created from multiple data frames (i.e. datasets collected individually 
+#' using CollectDataTwitter). Simply create a list of the data frames that you wish to create a network from. For
+#' example, \code{myList <- list(myTwitterData1, myTwitterData2, myTwitterData3)}.
+#' 
+#' @seealso \code{CollectDataFacebook}, \code{CollectDataTwitter}
 #' @keywords SNA bimodal network igraph social media
-#' @examples
-#'
-#' \dontrun{
-#'   ## This example shows how to collect Facebook page data and create a bimodal network
-#'
-#'   # Use your own values for myAppID and myAppSecret
-#'   myAppID <- "123456789098765"
-#'   myAppSecret <- "abc123abc123abc123abc123abc123ab"
-#'
-#'   # Authenticate with the Facebook API using `AuthenticateWithFacebookAPI`
-#'   fb_oauth <- AuthenticateWithFacebookAPI(appID=myAppID, appSecret=myAppSecret,
-#'     extended_permissions=FALSE, useCachedToken=TRUE)
-#'
-#'   # Run the `CollectDataFacebook` function and store the results in variable `myFacebookData`
-#'   myFacebookData <- CollectDataFacebook(pageName="StarWars", rangeFrom="2014-05-15",
-#'   rangeTo="2014-06-03",writeToFile=FALSE,verbose=TRUE)
-#'
-#'   # Create a 'bimodal' network using \code{CreateBimodalNetwork}
-#'   g_bimodal_facebook <- CreateBimodalNetwork(myFacebookData)
-#'
-#'   # View descriptive information about the bimodal network
-#'   g_bimodal_facebook
-#' }
-#'
-CreateBimodalNetwork <-
-function(x,writeToFile,removeTermsOrHashtags)
- {
-   if (missing(writeToFile)) {
-     writeToFile <- FALSE # default = not write to file
-   }
-   if (!missing(removeTermsOrHashtags)) {
-     removeTermsOrHashtags <- as.vector(removeTermsOrHashtags) #coerce to vector... to be sure
-   }
-
-   if (missing(removeTermsOrHashtags)) {
-     removeTermsOrHashtags <- "foobar"
-   }
-    UseMethod("CreateBimodalNetwork",x)
+#' 
+CreateBimodalNetwork <- function(x, writeToFile, removeTermsOrHashtags, ...) {
+  
+  if (missing(writeToFile)) {
+    writeToFile <- FALSE
+  }
+  
+  if (!missing(removeTermsOrHashtags)) {
+    removeTermsOrHashtags <- as.vector(removeTermsOrHashtags) # coerce to vector to be sure
+  }
+  
+  if (missing(removeTermsOrHashtags)) {
+    removeTermsOrHashtags <- "foobar"
   }
+  
+  UseMethod("CreateBimodalNetwork", x)
+}
diff --git a/vosonSML/R/CreateBimodalNetwork.default.R b/vosonSML/R/CreateBimodalNetwork.default.R
index 7dbc3ed..7582f62 100644
--- a/vosonSML/R/CreateBimodalNetwork.default.R
+++ b/vosonSML/R/CreateBimodalNetwork.default.R
@@ -1,18 +1,20 @@
-CreateBimodalNetwork.default <-
-function(x,writeToFile,removeTermsOrHashtags, ...)
-  {
-    if (missing(writeToFile)) {
-      writeToFile <- FALSE # default = not write to file
-    }
-    if (!missing(removeTermsOrHashtags)) {
-      removeTermsOrHashtags <- as.vector(removeTermsOrHashtags) #coerce to vector... to be sure
-    }
-
-    if (missing(removeTermsOrHashtags)) {
-      removeTermsOrHashtags <- "foobar"
-    }
-      cat("Error. Cannot create bimodal network using this type of data (see help file for data types and sources).\n")
-      # if (inherits(x,"temporal")) {
-      #   cat("(The data you supplied is temporal. Please use the `CreateDynamicNetwork` function for temporal data.)\n")
-      # }
+CreateBimodalNetwork.default <- function(x, writeToFile, removeTermsOrHashtags, ...) {
+  
+  if (missing(writeToFile)) {
+    writeToFile <- FALSE
   }
+  
+  if (!missing(removeTermsOrHashtags)) {
+    removeTermsOrHashtags <- as.vector(removeTermsOrHashtags) # coerce to vector to be sure
+  }
+  
+  if (missing(removeTermsOrHashtags)) {
+    removeTermsOrHashtags <- "foobar"
+  }
+  
+  cat("Error. Cannot create bimodal network using this type of data (see help file for data types and sources).\n")
+  
+  # if (inherits(x, "temporal")) {
+  #   cat("(The data you supplied is temporal. Please use the 'CreateDynamicNetwork' function for temporal data.)\n")
+  # }
+}
diff --git a/vosonSML/R/CreateBimodalNetwork.twitter.R b/vosonSML/R/CreateBimodalNetwork.twitter.R
index dc380ad..7f15993 100644
--- a/vosonSML/R/CreateBimodalNetwork.twitter.R
+++ b/vosonSML/R/CreateBimodalNetwork.twitter.R
@@ -1,106 +1,108 @@
-#' @export
-CreateBimodalNetwork.twitter <-
-function(x,writeToFile,removeTermsOrHashtags)
-{
-  from=to=edgeType=timeStamp=tweet_id=NULL # to please the gods of R CMD CHECK
-
-  if (missing(writeToFile)) {
-    writeToFile <- FALSE # default = not write to file
-  }
-
+CreateBimodalNetwork.twitter <- function(x, writeToFile = FALSE, removeTermsOrHashtags, verbose = FALSE) {
+  
+  from <- to <- edge_type <- timestamp <- status_id <- NULL
+  
   if (!missing(removeTermsOrHashtags)) {
-    removeTermsOrHashtags <- as.vector(removeTermsOrHashtags) #coerce to vector... to be sure
+    removeTermsOrHashtags <- as.vector(removeTermsOrHashtags) # coerce to vector to be sure
   }
-
+  
   if (missing(removeTermsOrHashtags)) {
     removeTermsOrHashtags <- "#fake_hashtag_foobar42_1234567890"
   }
-
-  df <- x # match the variable names (this must be used to avoid warnings in package compilation)
-
-  # convert df to data.table
+  
+  df <- x
   df <- data.table(df)
 
-  # Now create the dfBimodalNetwork2, a dataframe of relations between users and hashtags (i.e. user i "tweeted" hashtag j)
+  df_stats <- networkStats(NULL, "collected tweets", nrow(df))
 
-  print("Generating Twitter bimodal network...")  ### DEBUG
+  # create dfBimodalNetwork2, a dataframe of relations between users and hashtags (i.e. user i "tweeted" hashtag j)
+  cat("Generating twitter bimodal network...\n")
   flush.console()
 
-  #### ----- NEW WAY ---------
+  df_entities <- data.table("entity_id" = character(0), "display_name" = character(0))
 
-  # for speed we will pre-allocate `dataCombined` to a very large size (more rows than needed)
+  # for speed we will pre-allocate dataCombined to a very large size (more rows than needed)
   # and after everything is finished we will delete the unused rows
-
   dataCombined <- data.table(
-    from = as.character(c(rep("NA_f00",20000000))),
-    to = as.character(c(rep("NA_f00",20000000))),
-    edgeType = as.character(c(rep("NA_f00",20000000))),
-    timeStamp = as.character(c(rep("NA_f00",20000000))),
-    tweet_id = as.character(c(rep("NA_f00",20000000)))
+    from = as.character(c(rep("NA_f00", 20000000))),
+    to = as.character(c(rep("NA_f00", 20000000))),
+    edge_type = as.character(c(rep("NA_f00", 20000000))),
+    timestamp = as.character(c(rep("NA_f00", 20000000))),
+    status_id = as.character(c(rep("NA_f00", 20000000)))
   )
-
-  setkey(dataCombined,from) # set the key value of the data table
-
-  nextEmptyRow <- 1 # so we can update rows in `dataCombined` in a relatively efficient way
-
-  # We only need to do the 'hashtag' data (currently)
+  
+  setkey(dataCombined, from) # set the key value of the data table
+  
+  nextEmptyRow <- 1 # so we can update rows in 'dataCombined' in a relatively efficient way
+  
+  # we only need to do the 'hashtag' data (currently)
+  count <- 0
+  hashtag_count <- 0
   for (i in 1:nrow(df)) {
-
-    if (length(df$hashtags_used[[i]]) > 0) { # skip any rows where no hashtags were used
-
-      for (j in 1:length(df$hashtags_used[[i]])) { # for each hashtag in list
-
-        dataCombined[nextEmptyRow, from:= as.character(df$from_user[i][[1]])]
-        dataCombined[nextEmptyRow, to := as.character(df$hashtags_used[[i]][j])]
-        dataCombined[nextEmptyRow, edgeType := as.character("Used_hashtag")]
-        dataCombined[nextEmptyRow, timeStamp := as.character(df$created_at[i][[1]])]
-        dataCombined[nextEmptyRow, tweet_id := as.character(df$id[i][[1]])]
-
+    if (length(df$hashtags[[i]]) > 0) { # skip any rows where no hashtags were used # hashtags_used
+      if (length(df$hashtags[[i]]) == 1 & is.na(df$hashtags[[i]][1])) {
+        next 
+      }
+      
+      count <- count + 1
+      df_entities <- rbind(df_entities, list(df$user_id[i][[1]], df$screen_name[i][[1]]), stringsAsFactors = FALSE)
+      
+      for (j in 1:length(df$hashtags[[i]])) { # for each hashtag in list
+        
+        tag <- paste0("#", df$hashtags[[i]][j])
+        
+        dataCombined[nextEmptyRow, from:= as.character(df$user_id[i][[1]])]
+        dataCombined[nextEmptyRow, to := as.character(tag)]
+        dataCombined[nextEmptyRow, edge_type := as.character("hashtag")]
+        dataCombined[nextEmptyRow, timestamp := as.character(df$created_at[i][[1]])]
+        dataCombined[nextEmptyRow, status_id := as.character(df$status_id[i][[1]])]
+        
+        df_entities <- rbind(df_entities, list(tag, tag), stringsAsFactors = FALSE)
+
+        hashtag_count = hashtag_count + 1 
         nextEmptyRow <- nextEmptyRow + 1 # increment the row to update in `dataCombined`
-
       }
     }
   }
-
-  # we now delete all the rows at the end of `dataCombined` that are unused
-  dataCombined <- dataCombined[edgeType != "NA_f00"] # we just keep the rows that are unchanged from the original dummy data values
-
-  # make a vector of all the unique actors in the network1
-  actorsNames <- unique(factor(c(as.character(unique(dataCombined$from)),as.character(unique(dataCombined$to)))))
-
+  df_stats <- networkStats(df_stats, "tweets with hashtags", count, TRUE)
+  df_stats <- networkStats(df_stats, "hashtags", hashtag_count, TRUE)
+  
+  dataCombined <- dataCombined[edge_type != "NA_f00"]
+  
+  df_entities <- unique(df_entities)
+  
+  df_stats <- networkStats(df_stats, "nodes", nrow(df_entities))
+  df_stats <- networkStats(df_stats, "edges", sum(df_stats$count[df_stats$edge_count == TRUE]))
+  if (verbose) {
+    networkStats(df_stats, print = TRUE) 
+  }
+  
   relations <- data.frame(
-    from=dataCombined$from,
-    to=dataCombined$to,
-    edgeType=dataCombined$edgeType,
-    timeStamp=dataCombined$timeStamp,
-    tweet_id=dataCombined$tweet_id)
-
-  suppressWarnings(
-    g <- graph.data.frame(relations, directed=TRUE, vertices=actorsNames) # used to be vertices=actors (when it collected user data)
-  )
-
-  # Make the node labels play nice with Gephi
-  V(g)$label <- V(g)$name
-
+    from = dataCombined$from,
+    to = dataCombined$to,
+    edge_type = dataCombined$edge_type,
+    timestamp = dataCombined$timestamp,
+    status_id = dataCombined$status_id)
+  
+  g <- graph.data.frame(relations, directed = TRUE, vertices = df_entities)
+  
+  V(g)$display_name <- ifelse(is.na(V(g)$display_name), paste0("ID:", V(g)$name), V(g)$display_name)
+  
   # remove the search term / hashtags, if user specified it:
-  if (removeTermsOrHashtags[1]!="#fake_hashtag_foobar42_1234567890") {
-      toDel <- match(tolower(removeTermsOrHashtags),V(g)$name) # we force to lowercase because all terms/hashtags are already converted to lowercase
-      toDel <- toDel[!is.na(toDel)] # in case of user error (i.e. trying to delete terms/hashtags that don't exist in the data)
-      g <- delete.vertices(g, toDel)
-  }
-
-  if (writeToFile=="TRUE" | writeToFile=="true" | writeToFile=="T" | writeToFile==TRUE) {
-    # Output the final network to a graphml file, to import directly into Gephi
-    currTime <- format(Sys.time(), "%b_%d_%X_%Y_%Z")
-    currTime <- gsub(":","_",currTime)
-    write.graph(g,paste0(currTime,"_TwitterBimodalNetwork.graphml"),format="graphml")
-    cat("Twitter bimodal network was written to current working directory, with filename:\n")
-    cat(paste0(currTime,"_TwitterBimodalNetwork.graphml"))
+  if (removeTermsOrHashtags[1] != "#fake_hashtag_foobar42_1234567890") {
+    # we force to lowercase because all terms/hashtags are already converted to lowercase
+    toDel <- match(tolower(removeTermsOrHashtags), V(g)$name)
+    # in case of user error (i.e. trying to delete terms/hashtags that don't exist in the data)
+    toDel <- toDel[!is.na(toDel)]
+    g <- delete.vertices(g, toDel)
   }
-
-  cat("\nDone\n") ### DEBUG
+  
+  V(g)$label <- V(g)$display_name
+  
+  if (writeToFile) { writeOutputFile(g, "graphml", "TwitterBimodalNetwork") }
+  
+  cat("Done.\n")
   flush.console()
-
+  
   return(g)
-
 }
diff --git a/vosonSML/R/CreateSemanticNetwork.R b/vosonSML/R/CreateSemanticNetwork.R
index 4d6a7fa..1f46ed6 100644
--- a/vosonSML/R/CreateSemanticNetwork.R
+++ b/vosonSML/R/CreateSemanticNetwork.R
@@ -1,122 +1,80 @@
-#' Note: this function is DEPRECATED and will be removed in a future release.
-#' Please use the \code{Create} function
+#' Creates a semantic network from social media data (semantic relationships between concepts)
 #'
-#' Create semantic networks from social media data (semantic relationships
-#' between concepts)
+#' This function creates a semantic network from social media data (i.e. from data frames of class \code{dataSource}, 
+#' or for Twitter data it is also possible to provide a list of data frames). In such semantic networks, concepts are 
+#' words/terms extracted from the text corpus of social media data (e.g. tweets on Twitter).
 #'
-#' This function creates a semantic network from social media data (i.e. from
-#' data frames of class \code{dataSource}, or for Twitter data it is also
-#' possible to provide a list of data frames). In such semantic networks,
-#' concepts are words/terms extracted from the text corpus of social media data
-#' (e.g. tweets on Twitter).
+#' This function creates a weighted network from a data frame of class \code{dataSource} (which are created using the 
+#' 'CollectData' family of functions in the vosonSML package), or a list of Twitter data frames collected using 
+#' \code{CollectDataTwitter} function.
 #'
-#' This function creates a weighted network from a data frame of class
-#' \code{dataSource} (which are created using the `CollectData` family of
-#' functions in the vosonSML package), or a list of Twitter data frames
-#' collected using \code{CollectDataTwitter} function.
+#' The resulting semantic network is an igraph graph object. This graph object is semantic because vertices represent 
+#' unique concepts (in this case unique terms/words extracted from a social media text corpus), and edges represent 
+#' the co-occurrence of terms for all observations in the data set. For example, for a Twitter semantic network, 
+#' vertices represent either hashtags (e.g. "#auspol") or single terms ("politics"). If there are 1500 tweets in the 
+#' data set (i.e. 1500 observations), and the term "#auspol" and the term "politics" appear together in every tweet, 
+#' then this will be represented by an edge with weight equal to 1500.
 #'
-#' The resulting semantic network is an igraph graph object. This graph object
-#' is semantic because vertices represent unique concepts (in this case unique
-#' terms/words extracted from a social media text corpus), and edges represent
-#' the co-occurrence of terms for all observations in the data set. For
-#' example, for a Twitter semantic network, vertices represent either hashtags
-#' (e.g. "#auspol") or single terms ("politics"). If there are 1500 tweets in
-#' the data set (i.e. 1500 observations), and the term "#auspol" and the term
-#' "politics" appear together in every tweet, then this will be represented by
-#' an edge with weight equal to 1500.
-#'
-#' @param x a data frame of class \code{dataSource}. For Twitter data, it is
-#' also possible to provide a *list* of data frames (i.e. data frames that
-#' inherit class \code{dataSource} and \code{twitter}). Only lists of Twitter
-#' data frames are supported at this time. If a list of data frames is
-#' provided, then the function binds these row-wise and computes over the
-#' entire data set.
-#' @param writeToFile logical. If \code{TRUE} then the network is saved to file
-#' in current working directory (GRAPHML format), with filename denoting the
-#' current date/time and the type of network.
-#' @param termFreq numeric integer, specifying the percentage of most frequent
-#' TERMS to include. For example, a value of 20 means that the 20 percent most
-#' frequently occurring terms will be included in the semantic network. The
+#' @param x A data frame of class \code{dataSource}. For Twitter data, it is also possible to provide a *list* of data 
+#' frames (i.e. data frames that inherit class \code{dataSource} and \code{twitter}). Only lists of Twitter data 
+#' frames are supported at this time. If a list of data frames is provided, then the function binds these row-wise and 
+#' computes over the entire data set.
+#' @param writeToFile Logical. If \code{TRUE} then the network is saved to file in current working directory (GRAPHML 
+#' format), with filename denoting the current date/time and the type of network.
+#' @param termFreq Numeric integer. Specifies the percentage of most frequent TERMS to include. For example, a value 
+#' of 20 means that the 20 percent most frequently occurring terms will be included in the semantic network. The 
 #' default value is 5, meaning the 5 percent most frequent terms are used.
 #' @param hashtagFreq ** NOT IMPLEMENTED YET - DEFAULTS TO ALL HASHTAGS **.
-#' numeric integer, specifying the percentage of most frequent HASHTAGS to
-#' include. For example, a value of 80 means that the 80 percent most frequently
-#' occurring hashtags will be included in the semantic network. The default
-#' value is 50, meaning the 50 percent most frequent hashtags are used.
-#' @param removeTermsOrHashtags character vector. Default is none. Otherwise
-#' this argument specifies which terms or hashtags (i.e. vertices with matching
-#' `name`) should be removed from the semantic network. This is useful to
-#' remove the search term or hashtag that was used to collect the data (i.e.
-#' remove the corresponding vertex in the graph). For example, a value of
-#' "#auspol" means that if there is a vertex with the name "#auspol" then this
-#' vertex will be removed.
-#' @param stopwordsEnglish logical. If \code{TRUE} then English stopwords are
-#' removed from the tweets (e.g. words such as 'the' or 'and'). Using
-#' \code{FALSE} may be helpful non-English data sets. The default is
-#' \code{TRUE} (i.e. stopwords will be removed).
+#' Numeric integer. Specifies the percentage of most frequent HASHTAGS to include. For example, a value of 80 means 
+#' that the 80 percent most frequently occurring hashtags will be included in the semantic network. The default value 
+#' is 50, meaning the 50 percent most frequent hashtags are used.
+#' @param removeTermsOrHashtags Character string vector. Default is none. Otherwise this argument specifies which terms 
+#' or hashtags (i.e. vertices with matching 'name') should be removed from the semantic network. This is useful to 
+#' remove the search term or hashtag that was used to collect the data (i.e. remove the corresponding vertex in the 
+#' graph). For example, a value of "#auspol" means that if there is a vertex with the name "#auspol" then this vertex 
+#' will be removed.
+#' @param stopwordsEnglish Logical. If \code{TRUE} then English stopwords are removed from the tweets (e.g. words such 
+#' as 'the' or 'and'). Using \code{FALSE} may be helpful non-English data sets. The default is \code{TRUE} (i.e. 
+#' stopwords will be removed).
+#' @param ... Additional parameters to pass to the network creation method.
+#' 
 #' @return An igraph graph object, with weighted edges.
-#' @note Not all data sources in vosonSML can be used for creating
-#' semantic networks.
-#'
-#' Currently supported data sources are:
-#'
-#' - Twitter
-#'
-#' Other data sources (e.g. YouTube and Facebook) will be implemented in the
-#' future. Additionally, the user is notified if they try to create semantic
-#' networks for incompatible data sources.
-#'
-#' For Twitter data, semantic networks can be created from multiple data frames
-#' (i.e. datasets collected individually using CollectDataTwitter). Simply
-#' create a list of the data frames that you wish to create a network from. For
-#' example, \code{myList <- list(myTwitterData1, myTwitterData2,
-#' myTwitterData3)}.
-#' @author Timothy Graham <timothy.graham@@anu.edu.au> & Robert Ackland
-#' <robert.ackland@@anu.edu.au>
-#' @seealso See \code{CollectDataTwitter} to collect data for creating semantic
-#' networks in vosonSML.
-#' @keywords SNA semantic network igraph social media
-#' @examples
-#'
-#' \dontrun{
-#'   ## This example shows how to collect Twitter data and create a semantic network
-#'
-#'   # Firstly specify your API credentials
-#'   my_api_key <- "1234567890qwerty"
-#'   my_api_secret <- "1234567890qwerty"
-#'   my_access_token <- "1234567890qwerty"
-#'   my_access_token_secret <- "1234567890qwerty"
-#'
-#'   # Authenticate with the Twitter API using \code{AuthenticateWithTwitterAPI}
-#'   AuthenticateWithTwitterAPI(api_key=my_api_key, api_secret=my_api_secret,
-#'     access_token=my_access_token, access_token_secret=my_access_token_secret)
-#'
-#'   # Collect tweets data using \code{myTwitterData}
-#'   myTwitterData <- CollectDataTwitter(searchTerm="#auspol",
-#'     numTweets=200,writeToFile=FALSE,verbose=FALSE)
-#'
-#'   # Create a 'semantic' network using \code{CreateSemanticNetwork}
-#'   g_semantic_twitter <- CreateSemanticNetwork(myTwitterData,writeToFile=FALSE,
-#'     termFreq=20,hashtagFreq=80)
+#' 
+#' @note Currently supported data sources:
+#' \itemize{
+#'   \item \code{twitter}
 #' }
 #'
-CreateSemanticNetwork <-
-function(x,writeToFile,termFreq,hashtagFreq,removeTermsOrHashtags,stopwordsEnglish)
-  {
-    if (missing(writeToFile)) {
-      writeToFile <- FALSE # default = not write to file
-    }
-    if (missing(termFreq)) {
-      termFreq <- 5 # default to the top 5% most frequent terms. reduces size of graph.
-    }
-    if (missing(hashtagFreq)) {
-      hashtagFreq <- 50 # default to the top 50% hashtags. reduces size of graph. hashtags are 50% because they are much less frequent than terms.
-    }
-    if (missing(removeTermsOrHashtags)) {
-      removeTermsOrHashtags <- NA
-    }
-    if (missing(stopwordsEnglish)) {
-      stopwordsEnglish <- TRUE # default to true, because most English users will probably want this
-    }
-      UseMethod("CreateSemanticNetwork",x)
-   }
+#' For Twitter data, semantic networks can be created from multiple data frames (i.e. datasets collected individually 
+#' using CollectDataTwitter). Simply create a list of the data frames that you wish to create a network from. For
+#' example, \code{myList <- list(myTwitterData1, myTwitterData2, myTwitterData3)}.
+#' 
+#' @seealso \code{CollectDataTwitter}
+#' @keywords SNA semantic network igraph social media
+#' 
+CreateSemanticNetwork <- function(x, writeToFile, termFreq, hashtagFreq, removeTermsOrHashtags, stopwordsEnglish, ...) {
+  
+  if (missing(writeToFile)) {
+    writeToFile <- FALSE
+  }
+  
+  if (missing(termFreq)) {
+    termFreq <- 5 # default to the top 5% most frequent terms. reduces size of graph.
+  }
+  
+  if (missing(hashtagFreq)) {
+    # default to the top 50% hashtags. reduces size of graph. hashtags are 50% because they are much less frequent 
+    # than terms.
+    hashtagFreq <- 50
+  }
+  
+  if (missing(removeTermsOrHashtags)) {
+    removeTermsOrHashtags <- NA
+  }
+  
+  if (missing(stopwordsEnglish)) {
+    stopwordsEnglish <- TRUE # default to true, because most English users will probably want this
+  }
+  
+  UseMethod("CreateSemanticNetwork", x)
+}
diff --git a/vosonSML/R/CreateSemanticNetwork.default.R b/vosonSML/R/CreateSemanticNetwork.default.R
index de8a5f3..156560d 100644
--- a/vosonSML/R/CreateSemanticNetwork.default.R
+++ b/vosonSML/R/CreateSemanticNetwork.default.R
@@ -1,20 +1,26 @@
-CreateSemanticNetwork.default <-
-function(x,writeToFile,termFreq,hashtagFreq,removeTermsOrHashtags)
-  {
-    if (missing(writeToFile)) {
-      writeToFile <- FALSE # default = not write to file
-    }
-    if (missing(termFreq)) {
-      termFreq <- 5 # default to the top 5% most frequent terms. reduces size of graph.
-    }
-    if (missing(hashtagFreq)) {
-      hashtagFreq <- 50 # default to the top 50% hashtags. reduces size of graph. hashtags are 50% because they are much less frequent than terms.
-    }
-    if (missing(removeTermsOrHashtags)) {
-      removeTermsOrHashtags <- NA
-    }
-      cat("Error. Cannot create semantic network using this type of data (see help file for data types and sources).\n")
-      if (inherits(x,"temporal")) {
-        cat("(The data you supplied is temporal. Please use the `CreateDynamicNetwork` function for temporal data.)\n")
-      }
+CreateSemanticNetwork.default <- function(x, writeToFile, termFreq, hashtagFreq, removeTermsOrHashtags) {
+  
+  if (missing(writeToFile)) {
+    writeToFile <- FALSE # default = not write to file
   }
+  
+  if (missing(termFreq)) {
+    termFreq <- 5 # default to the top 5% most frequent terms. reduces size of graph.
+  }
+  
+  if (missing(hashtagFreq)) {
+    # default to the top 50% hashtags. reduces size of graph. hashtags are 50% because they are much less frequent 
+    # than terms.
+    hashtagFreq <- 50
+  }
+  
+  if (missing(removeTermsOrHashtags)) {
+    removeTermsOrHashtags <- NA
+  }
+  
+  cat("Error. Cannot create semantic network using this type of data (see help file for data types and sources).\n")
+  
+  if (inherits(x, "temporal")) {
+    cat("(The data you supplied is temporal. Please use the `CreateDynamicNetwork` function for temporal data.)\n")
+  }
+}
diff --git a/vosonSML/R/CreateSemanticNetwork.twitter.R b/vosonSML/R/CreateSemanticNetwork.twitter.R
index 0e26f45..3e7c767 100644
--- a/vosonSML/R/CreateSemanticNetwork.twitter.R
+++ b/vosonSML/R/CreateSemanticNetwork.twitter.R
@@ -1,259 +1,240 @@
-#' @export
-CreateSemanticNetwork.twitter <-
-function(x,writeToFile,termFreq,hashtagFreq,removeTermsOrHashtags,stopwordsEnglish)
-{
+CreateSemanticNetwork.twitter <- function(x, writeToFile = FALSE, termFreq = 5, hashtagFreq = 50, 
+                                          removeTermsOrHashtags, stopwordsEnglish = TRUE, verbose = FALSE) {
 
-  if (missing(writeToFile)) {
-    writeToFile <- FALSE # default = not write to file
-  }
-
-  if (missing(stopwordsEnglish)) {
-    stopwordsEnglish <- TRUE # default to true, because most English users will probably want this
-  }
-
-  if (missing(termFreq)) {
-    termFreq <- 5 # default to the top 5% most frequent terms. reduces size of graph.
-  }
-
-  if (missing(hashtagFreq)) {
-    hashtagFreq <- 50 # default to the top 50% hashtags. reduces size of graph. hashtags are 50% because they are much less frequent than terms.
-  }
+  # default to the top 5% most frequent terms. reduces size of graph
+  # default to the top 50% hashtags. reduces size of graph. hashtags are 50% because they are much less 
+  # frequent than terms.
 
   if (!missing(removeTermsOrHashtags)) {
     removeTermsOrHashtags <- as.vector(removeTermsOrHashtags) #coerce to vector... to be sure
-  }
-
-  if (missing(removeTermsOrHashtags)) {
+  } else {
     removeTermsOrHashtags <- "foobar"
   }
-
-
+  
   df <- x # match the variable names (this must be used to avoid warnings in package compilation)
-
+  
   # if `df` is a list of dataframes, then need to convert these into one dataframe
   suppressWarnings(
-    if (class(df)=="list") {
-    df <- do.call("rbind", df)
-    }
-  )
+    if (class(df) == "list") {
+      df <- do.call("rbind", df)
+    })
   
-  EnsurePackage("igraph")
-
-      # Now create the dfSemanticNetwork3,
-      # a dataframe of relations between hashtags and terms
-      # (i.e. hashtag i and term j both occurred in same tweet
-      # (weight = n occurrences))
-
-      print("Generating Twitter semantic network...")  ### DEBUG
-      flush.console()
-
-      # convert the hashtags to lowercase here (before using tm_map later)
-      # but first deal with character encoding:
-      macMatch <- grep("darwin",R.Version()$os)
-      if (length(macMatch)!=0) {
-        # df$hashtags_used <- iconv(df$hashtags_used,to="utf-8-mac")
-        df$hashtags_used <- lapply(df$hashtags_used, function(x) TrimOddCharMac(x))
-      }
-      if (length(macMatch)==0) {
-        df$hashtags_used <- lapply(df$hashtags_used, function(x) TrimOddChar(x))
-      }
-      # ... and then convert to lowercase:
-      df$hashtags_used <- lapply(df$hashtags_used,tolower)
-
-      # do the same for the comment text, but first deal with character encoding!
-      # we need to change value of `to` argument in `iconv` depending on OS, or else errors can occur
-      macMatch <- grep("darwin",R.Version()$os)
-      if (length(macMatch)!=0) {
-        df$text <- iconv(df$text,to="utf-8-mac")
-      }
-      if (length(macMatch)==0) {
-        df$text <- iconv(df$text,to="utf-8")
-      }
-      # ... and then convert to lowercase:
-      df$text <- tolower(df$text)
-
-      hashtagsUsedTemp <- c() # temp var to store output
-
-      # The 'hashtags_used' column in the 'df' dataframe
-      # is slightly problematic (i.e. not straightforward)
-      # because each cell in this column contains a
-      # LIST, itself containing 1 or more char vectors
-      # (which are unique hashtags found in the tweet text; empty if no hashtags used).
-      # So, need to extract each list item out,
-      # and put it into its own row in a new dataframe:
-
-      for (i in 1:nrow(df)) {
-        if (length(df$hashtags_used[[i]]) > 0) { # skip any rows where NO HASHTAGS were used
-          for (j in 1:length(df$hashtags_used[[i]])) {
-            #commonTermsTemp <- c(commonTermsTemp, df$from_user[i])
-            hashtagsUsedTemp <- c(hashtagsUsedTemp,df$hashtags_used[[i]][j])
-          }
-        }
-      }   # NOTE: try and vectorise this in future work to improve speed.
-
-      hashtagsUsedTemp <- unique(hashtagsUsedTemp)
-
-### delete hashtags that contain 'horizontal ellipses'
-
-      # delEllipses <- grep("\u2026",hashtagsUsedTemp)
-      # cat(paste("\nNumber of hashtags with ellipses: ",length(delEllipses),"\n"))
-      # cat(paste("\nThe offending hashtags:\n",hashtagsUsedTemp[delEllipses],"\n"))
-      # cat("Original:\n")
-      # cat(hashtagsUsedTemp)
-      # hashtagsUsedTemp <- hashtagsUsedTemp[-delEllipses]
-      # cat("Fixed:\n")
-      # cat(hashtagsUsedTemp)
-
-########
-
-      hashtagsUsedTempFrequency <- c()
-      # potentially do not want EVERY hashtag - just the top N% (most common):
-      for (i in 1: length(hashtagsUsedTemp)) {
-          hashtagsUsedTempFrequency[i] <- length(grep(hashtagsUsedTemp[i],df$text))
-      }
-      mTemp <- cbind(hashtagsUsedTemp, hashtagsUsedTempFrequency)
-      mTemp2 <- as.matrix(as.numeric(mTemp[,2]))
-      names(mTemp2) <- mTemp[,1]
-      vTemp <- sort(mTemp2, decreasing=TRUE)
-      hashtagsUsedTemp <- names(head(vTemp, (length(vTemp) / 100) * hashtagFreq))
-      ################################ ^^^^ this defaults to top 50% hashtags
-
-      # we need to remove all punctuation EXCEPT HASHES (!)
-      # (e.g. both #auspol and auspol will appear in data)
-      df$text <- gsub("[^[:alnum:][:space:]#]", "", df$text)
-
-      ## Find the most frequent terms across the tweet text corpus
-      commonTermsTemp <- df$text
-
-      corpusTweetText <- Corpus(VectorSource(commonTermsTemp))
-
-      ## add usernames to stopwords
-
-      mach_usernames <- sapply(df$screen_name, function(x) TrimOddChar(x))
-      mach_usernames <- unique(mach_usernames)
-      if (length(macMatch)!=0) {
-        mach_usernames <- iconv(mach_usernames,to="utf-8-mac")
-      }
-      if (length(macMatch)==0) {
-        mach_usernames <- iconv(mach_usernames,to="utf-8")
-      }
-
-      # we remove the usernames from the text (so they don't appear in data/network)
-      my_stopwords <- mach_usernames
-      corpusTweetText <- tm_map(corpusTweetText, removeWords, my_stopwords)
-
-      # convert to all lowercase (WE WILL DO THIS AGAIN BELOW, SO REMOVE THIS DUPLICATE)
-      # corpusTweetText <- tm_map(corpusTweetText, content_transformer(tolower))
-
-      # remove English stop words (IF THE USER HAS SPECIFIED!)
-      if (stopwordsEnglish) {
-        corpusTweetText <- tm_map(corpusTweetText, removeWords, stopwords("english"))
-      }
-
-      # eliminate extra whitespace
-      corpusTweetText <- tm_map(corpusTweetText, stripWhitespace)
-
-      # create document term matrix applying some transformations
-      # note: applying too many transformations here (duplicating...) - need to fix
-      tdm = TermDocumentMatrix(corpusTweetText,
-         control = list(removeNumbers = TRUE, tolower = TRUE))
-
-      # create a vector of the common terms, finding the top N% terms
-      # N will need to be adjusted according to network / user requirements.
-
-      mTemp <- as.matrix(tdm)
-      vTemp <- sort(rowSums(mTemp), decreasing=TRUE)
-      commonTerms <- names(head(vTemp, (length(vTemp) / 100) * termFreq))
-      ################################ ^^^^ the default finds top 5% terms
-
-      toDel <- grep("http",commonTerms) # !! still picking up junk terms (FIX)
-      if(length(toDel) > 0) {
-        commonTerms <- commonTerms[-toDel] # delete these junk terms
+  # now create the dfSemanticNetwork3, a dataframe of relations between hashtags and terms (i.e. hashtag i and term j 
+  # both occurred in same tweet (weight = n occurrences))
+  
+  df_stats <- networkStats(NULL, "collected tweets", nrow(df))
+  
+  cat("Generating twitter semantic network...\n")
+  flush.console()
+  
+  # convert the hashtags to lowercase here (before using tm_map later) but first deal with character encoding
+  macMatch <- grep("darwin", R.Version()$os)
+  if (length(macMatch) != 0) {
+    # df$hashtags_used <- iconv(df$hashtags_used, to = "utf-8-mac")
+    df$hashtags <- lapply(df$hashtags, function(x) TrimOddCharMac(x))
+  }
+  
+  if (length(macMatch) == 0) {
+    df$hashtags <- lapply(df$hashtags, function(x) TrimOddChar(x))
+  }
+  
+  # and then convert to lowercase
+  df$hashtags <- lapply(df$hashtags, tolower)
+  
+  # do the same for the comment text, but first deal with character encoding!
+  # we need to change value of `to` argument in 'iconv' depending on OS, or else errors can occur
+  macMatch <- grep("darwin", R.Version()$os)
+  if (length(macMatch) != 0) {
+    df$text <- iconv(df$text, to = "utf-8-mac")
+  }
+  
+  if (length(macMatch) == 0) {
+    df$text <- iconv(df$text, to = "utf-8")
+  }
+  
+  # and then convert to lowercase
+  df$text <- tolower(df$text)
+  
+  hashtagsUsedTemp <- c() # temp var to store output
+  
+  # the 'hashtags_used' column in the 'df' dataframe is slightly problematic (i.e. not straightforward)
+  # because each cell in this column contains a LIST, itself containing 1 or more char vectors (which are unique 
+  # hashtags found in the tweet text; empty if no hashtags used).
+  # so, need to extract each list item out, and put it into its own row in a new dataframe
+  count <- 0
+  for (i in 1:nrow(df)) {
+    if (length(df$hashtags[[i]]) > 0) { # skip any rows where NO HASHTAGS were used
+      for (j in 1:length(df$hashtags[[i]])) {
+        count <- count + 1
+        #commonTermsTemp <- c(commonTermsTemp, df$from_user[i])
+        hashtagsUsedTemp <- c(hashtagsUsedTemp, df$hashtags[[i]][j])
       }
-
-      # create the "semantic hashtag-term network" dataframe
-      # (i.e. pairs of hashtags / terms)
-
-      termAssociatedWithHashtag <- c() # temp var to store output
-      hashtagAssociatedWithTerm <- c() # temp var to store output
-
-      for (i in 1:nrow(df)) {
-        if (length(df$hashtags_used[[i]]) > 0) { # skip any rows where NO HASHTAGS were used
-          for (j in 1:length(df$hashtags_used[[i]])) {
-            for (k in 1:length(commonTerms)) {
-              match <- grep(commonTerms[k],df$text[i])
-              if (length(match) > 0) {
-
-                termAssociatedWithHashtag <- c(termAssociatedWithHashtag,commonTerms[k])
-                hashtagAssociatedWithTerm <- c(hashtagAssociatedWithTerm,df$hashtags_used[[i]][j])
-
-              }
-            }
+    }
+  } # try and vectorise this in future work to improve speed
+  df_stats <- networkStats(df_stats, "raw hashtags", count, FALSE)
+  
+  hashtagsUsedTemp <- unique(hashtagsUsedTemp)
+  df_stats <- networkStats(df_stats, "unique hashtags", length(hashtagsUsedTemp), FALSE)
+  
+  hashtagsUsedTempFrequency <- c()
+  
+  # potentially do not want EVERY hashtag - just the top N% (most common)
+  for (i in 1: length(hashtagsUsedTemp)) {
+    hashtagsUsedTempFrequency[i] <- length(grep(hashtagsUsedTemp[i], df$text))
+  }
+  
+  mTemp <- cbind(hashtagsUsedTemp, hashtagsUsedTempFrequency)
+  mTemp2 <- as.matrix(as.numeric(mTemp[, 2]))
+  names(mTemp2) <- mTemp[, 1]
+  vTemp <- sort(mTemp2, decreasing = TRUE)
+  
+  # this defaults to top 50% hashtags
+  hashtagsUsedTemp <- names(head(vTemp, (length(vTemp) / 100) * hashtagFreq))
+  df_stats <- networkStats(df_stats, paste0("top ", hashtagFreq , "% hashtags"), length(hashtagsUsedTemp), FALSE)
+  
+  # we need to remove all punctuation EXCEPT HASHES (!) (e.g. both #auspol and auspol will appear in data)
+  df$text <- gsub("[^[:alnum:][:space:]#]", "", df$text)
+  
+  # find the most frequent terms across the tweet text corpus
+  commonTermsTemp <- df$text
+  
+  corpusTweetText <- Corpus(VectorSource(commonTermsTemp))
+  
+  # add usernames to stopwords
+  mach_usernames <- sapply(df$screen_name, function(x) TrimOddChar(x))
+  mach_usernames <- unique(mach_usernames)
+  
+  if (length(macMatch) != 0) {
+    mach_usernames <- iconv(mach_usernames, to = "utf-8-mac")
+  }
+  
+  if (length(macMatch) == 0) {
+    mach_usernames <- iconv(mach_usernames, to = "utf-8")
+  }
+  
+  # we remove the usernames from the text (so they don't appear in data/network)
+  my_stopwords <- mach_usernames
+  corpusTweetText <- tm_map(corpusTweetText, removeWords, my_stopwords)
+  
+  # convert to all lowercase (WE WILL DO THIS AGAIN BELOW, SO REMOVE THIS DUPLICATE)
+  # corpusTweetText <- tm_map(corpusTweetText, content_transformer(tolower))
+  
+  # remove English stop words (IF THE USER HAS SPECIFIED!)
+  if (stopwordsEnglish) {
+    corpusTweetText <- tm_map(corpusTweetText, removeWords, stopwords("english"))
+  }
+  
+  # eliminate extra whitespace
+  corpusTweetText <- tm_map(corpusTweetText, stripWhitespace)
+  
+  # create document term matrix applying some transformations
+  # ** applying too many transformations here (duplicating...) - need to fix
+  tdm = TermDocumentMatrix(corpusTweetText, control = list(removeNumbers = TRUE, tolower = TRUE))
+  
+  # create a vector of the common terms, finding the top N% terms
+  # N will need to be adjusted according to network / user requirements
+  mTemp <- as.matrix(tdm)
+  vTemp <- sort(rowSums(mTemp), decreasing = TRUE)
+  df_stats <- networkStats(df_stats, paste0("common terms"), length(vTemp), FALSE)
+  
+  ## the default finds top 5% terms
+  commonTerms <- names(head(vTemp, (length(vTemp) / 100) * termFreq))
+  
+  toDel <- grep("http", commonTerms) # !! still picking up junk terms (FIX)
+  if (length(toDel) > 0) {
+    commonTerms <- commonTerms[-toDel] # delete these junk terms
+  }
+  df_stats <- networkStats(df_stats, paste0("top ", termFreq , "% terms"), length(commonTerms), FALSE)
+  
+  # create the "semantic hashtag-term network" dataframe (i.e. pairs of hashtags / terms)
+  
+  termAssociatedWithHashtag <- c() # temp var to store output
+  hashtagAssociatedWithTerm <- c() # temp var to store output
+  
+  for (i in 1:nrow(df)) {
+    if (length(df$hashtags[[i]]) > 0) { # skip any rows where NO HASHTAGS were used
+      for (j in 1:length(df$hashtags[[i]])) {
+        for (k in 1:length(commonTerms)) {
+          
+          match <- grep(commonTerms[k], df$text[i])
+          
+          if (length(match) > 0) {
+            termAssociatedWithHashtag <- c(termAssociatedWithHashtag, commonTerms[k])
+            hashtagAssociatedWithTerm <- c(hashtagAssociatedWithTerm, df$hashtags[[i]][j])
           }
         }
-      }   # THIS IS A *HORRIBLE* LOOPED APPROACH. NEED TO VECTORISE!!!
-
-      # this needs to be changed to termAssociatedWithHashtag and hashtagAssociatedWithTerm
-      dfSemanticNetwork3 <- data.frame(hashtagAssociatedWithTerm, termAssociatedWithHashtag)
-
-      # OK, now extract only the UNIQUE pairs (i.e. rows)
-      # But, also create a WEIGHT value for usages of the same hashtag.
-          # NOTE: This edge weights approach might be problematic for TEMPORAL networks, because each edge (with weight > 1) may represent usage of hashtags at DIFFERENT TIMES.
-          # NOTE: A possible workaround could be to include an edge attribute that is a set of timestamp elements, showing the date/time of each instance of usage of a hashtag.
-          # NOTE: For example, in a temporal visualisation, the first timestamp might 'pop in' the edge to the graph, which then might start to 'fade out' over time (or just 'pop out' of graph after N seconds) if there are no more timestamps indicating activity (i.e. a user using a hashtag).
-          # NOTE: So, a 'timestamps' edge attribute could factor into a kind of 'entropy' based approach to evolving the network visually over time.
-
-      # unique pairs:
-      unique_dfSemanticNetwork3 <- unique(dfSemanticNetwork3) # hmm, need this still?
-
-      # number of times hashtag was used per user/hashtag pair (i.e. edge weight):
-      for (i in 1:nrow(unique_dfSemanticNetwork3)) {
-        unique_dfSemanticNetwork3$numHashtagTermOccurrences[i] <- sum(
-          hashtagAssociatedWithTerm==unique_dfSemanticNetwork3[i,1] &
-          termAssociatedWithHashtag==unique_dfSemanticNetwork3[i,2])
-      }
-
-      # make a dataframe of the relations between actors
-      relations <- data.frame(from=as.character(unique_dfSemanticNetwork3[,1]),to=as.character(unique_dfSemanticNetwork3[,2]),weight=unique_dfSemanticNetwork3$numHashtagTermOccurrences)
-      relations$from <- as.factor(relations$from)
-      relations$to <- as.factor(relations$to)
-
-      actorsFixed <- rbind(as.character(unique_dfSemanticNetwork3[,1]),as.character(unique_dfSemanticNetwork3[,2]))
-      actorsFixed <- as.factor(actorsFixed)
-      actorsFixed <- unique(actorsFixed)
-
-      ##### STEP FOUR #####
-
-      # convert into a graph
-      suppressWarnings(
-        g <- graph.data.frame(relations, directed=FALSE, vertices=actorsFixed)
-      )
-      # we need to simplify the graph because multiple use of same term
-      # in one tweet will cause self-loops, etc
-      # g <- simplify(g)
-
-      # Make the node labels play nice with Gephi
-      V(g)$label <- V(g)$name
-
-      # remove the search term / hashtags, if user specified it:
-      if (removeTermsOrHashtags[1]!="foobar") {
-          toDel <- match(tolower(removeTermsOrHashtags),V(g)$name) # we force to lowercase because all terms/hashtags are already converted to lowercase
-          toDel <- toDel[!is.na(toDel)] # in case of user error (i.e. trying to delete terms/hashtags that don't exist in the data)
-          g <- delete.vertices(g, toDel)
-      }
-
-      if (writeToFile=="TRUE" | writeToFile=="true" | writeToFile=="T" | writeToFile==TRUE) {
-        # Output the final network to a graphml file, to import directly into Gephi
-        currTime <- format(Sys.time(), "%b_%d_%X_%Y_%Z")
-        currTime <- gsub(":","_",currTime)
-        write.graph(g,paste0(currTime,"_TwitterSemanticNetwork.graphml"),format="graphml")
-        cat("Twitter semantic network was written to current working directory, with filename:\n")
-        cat(paste0(currTime,"_TwitterSemanticNetwork.graphml"))
       }
-
-      cat("\nDone.") ### DEBUG
-      flush.console()
-
-    return(g)
-
+    }
+  } # THIS IS A *HORRIBLE* LOOPED APPROACH. NEED TO VECTORISE!!!
+  
+  # this needs to be changed to termAssociatedWithHashtag and hashtagAssociatedWithTerm
+  dfSemanticNetwork3 <- data.frame(hashtagAssociatedWithTerm, termAssociatedWithHashtag)
+  
+  # OK, now extract only the UNIQUE pairs (i.e. rows)
+  # But, also create a WEIGHT value for usages of the same hashtag.
+  # NOTE: This edge weights approach might be problematic for TEMPORAL networks, because each edge (with weight > 1) 
+  # may represent usage of hashtags at DIFFERENT TIMES.
+  # NOTE: A possible workaround could be to include an edge attribute that is a set of timestamp elements, showing the 
+  # date/time of each instance of usage of a hashtag.
+  # NOTE: For example, in a temporal visualisation, the first timestamp might 'pop in' the edge to the graph, which 
+  # then might start to 'fade out' over time (or just 'pop out' of graph after N seconds) if there are no more 
+  # timestamps indicating activity (i.e. a user using a hashtag).
+  # NOTE: So, a 'timestamps' edge attribute could factor into a kind of 'entropy' based approach to evolving the 
+  # network visually over time.
+  
+  # unique pairs
+  unique_dfSemanticNetwork3 <- unique(dfSemanticNetwork3) # hmm, need this still?
+  
+  # number of times hashtag was used per user/hashtag pair (i.e. edge weight):
+  for (i in 1:nrow(unique_dfSemanticNetwork3)) {
+    unique_dfSemanticNetwork3$numHashtagTermOccurrences[i] <- sum(
+      hashtagAssociatedWithTerm == unique_dfSemanticNetwork3[i, 1] & 
+        termAssociatedWithHashtag == unique_dfSemanticNetwork3[i, 2])
+  }
+  
+  # make a dataframe of the relations between actors
+  relations <- data.frame(from = as.character(unique_dfSemanticNetwork3[, 1]), 
+                          to = as.character(unique_dfSemanticNetwork3[,2]),
+                          weight = unique_dfSemanticNetwork3$numHashtagTermOccurrences)
+  
+  relations$from <- as.factor(relations$from)
+  relations$to <- as.factor(relations$to)
+  
+  actorsFixed <- rbind(as.character(unique_dfSemanticNetwork3[, 1]), as.character(unique_dfSemanticNetwork3[, 2]))
+  actorsFixed <- as.factor(actorsFixed)
+  actorsFixed <- unique(actorsFixed)
+  df_stats <- networkStats(df_stats, "unique entities (nodes)", length(actorsFixed))
+  df_stats <- networkStats(df_stats, "relations (edges)", nrow(relations))
+  
+  # convert into a graph
+  suppressWarnings(g <- graph.data.frame(relations, directed = FALSE, vertices = actorsFixed))
+  
+  # we need to simplify the graph because multiple use of same term in one tweet will cause self-loops, etc
+  # g <- simplify(g)
+  
+  # make the node labels play nice with Gephi
+  V(g)$label <- V(g)$name
+  
+  # remove the search term / hashtags, if user specified it
+  if (removeTermsOrHashtags[1] != "foobar") {
+    # we force to lowercase because all terms/hashtags are already converted to lowercase
+    toDel <- match(tolower(removeTermsOrHashtags), V(g)$name)
+    
+    # in case of user error (i.e. trying to delete terms/hashtags that don't exist in the data)
+    toDel <- toDel[!is.na(toDel)]
+    
+    g <- delete.vertices(g, toDel)
+    
+    df_stats <- networkStats(df_stats, "entities after terms/hashtags removed", vcount(g))
+  }
+  
+  # print stats
+  if (verbose) { networkStats(df_stats, print = TRUE) }
+  
+  if (writeToFile) { writeOutputFile(g, "graphml", "TwitterSemanticNetwork") }
+  
+  cat("Done.\n")
+  flush.console()
+  
+  return(g)
 }
diff --git a/vosonSML/R/EnsurePackage.R b/vosonSML/R/EnsurePackage.R
deleted file mode 100644
index 8e24ffe..0000000
--- a/vosonSML/R/EnsurePackage.R
+++ /dev/null
@@ -1,12 +0,0 @@
-EnsurePackage <-
-function(x) {
-  # EnsurePackage(x) - Installs and loads a package if necessary
-  # Args:
-  #   x: name of package
-
-  x <- as.character(x)
-  if (!require(x, character.only=TRUE)) {
-    install.packages(pkgs=x, repos="http://cran.r-project.org")
-    require(x, character.only=TRUE)
-  }
-}
diff --git a/vosonSML/R/ExtractHashtagInfo.R b/vosonSML/R/ExtractHashtagInfo.R
deleted file mode 100644
index 784bdda..0000000
--- a/vosonSML/R/ExtractHashtagInfo.R
+++ /dev/null
@@ -1,27 +0,0 @@
-ExtractHashtagInfo <-
-function(df) {
-  # For each tweet, extract ANY hashtags that a user has used:
-
-  df$hashtags_used <- sapply(df$text, function(tweet)
-
-  # OLD WAY:
-  # TrimHead(str_match_all(tweet,"#[[:alnum:]_+]*")[[1]])
-
-  # NEW WAY:
-  # This matches hashtags, but not if the hashtag is "cut off" at the end
-  # of the tweet text, denoted by a 'trailing ellipsis' character.
-  # This avoids the problem of picking up erroneous hashtags that are cut off,
-  # e.g. "#ausp..." when it should be "#auspol"
-
-    # horizontalEllipsis <- "\u2026"
-    # horizontalEllipsisFixed <- stri_unescape_unicode(horizontalEllipsis)
-
-    # patternRegex <- paste0("#[^#\\s]+(?!\\\u2026)\\b")
-    # TrimHead(str_match_all(tweet,paste0("#[[:alnum:]_+^",horizontalEllipsis,"$]*"))[[1]])
-
-    regmatches(tweet, gregexpr("#[^#\\s]+(?!\u2026)\\b", tweet, perl=T))
-
-  )
-
-  return(df)
-}
diff --git a/vosonSML/R/ExtractUrls.R b/vosonSML/R/ExtractUrls.R
deleted file mode 100644
index 73bd0dc..0000000
--- a/vosonSML/R/ExtractUrls.R
+++ /dev/null
@@ -1,16 +0,0 @@
-ExtractUrls <-
-function(df) {
-  # For each tweet, extract url, remove it from the tweet,
-  # and put them separately in a new column
-  # TODO: cannot deal with multiple urls in one tweet right now
-
-  # EnsurePackage("stringr")
-  # EnsurePackage("grid")
-
-  # extracts links (quick and dirty)
-  # wish to have something like http://daringfireball.net/2009/11/liberal_regex_for_matching_urls
-  df$links <- sapply(df$text,function(tweet) str_extract(tweet,("http[^[:blank:]]+")))
-  df$text <- sapply(df$text, function(x) TrimUrls(x))
-
-  return(df)
-}
diff --git a/vosonSML/R/ExtractUserInfo.R b/vosonSML/R/ExtractUserInfo.R
deleted file mode 100644
index fcb919c..0000000
--- a/vosonSML/R/ExtractUserInfo.R
+++ /dev/null
@@ -1,19 +0,0 @@
-ExtractUserInfo <-
-function(df) {
-  # For each tweet, extract information related to users
-  # such as to_user, rt_user...
-
-  # extract to_user
-  df$reply_to <- sapply(df$text, function(tweet)
-    TrimHead(str_extract(tweet,"^((\\.)?(@[[:alnum:]_+]*))")))
-
-  # extract any MENTIONS at all (inc. replies, mentions, etc) ### This is a completely new approach - it 'vacuums' up ANY mentions
-  df$users_mentioned <- sapply(df$text, function(tweet)
-  TrimHead(str_match_all(tweet,"@[[:alnum:]_+]*")[[1]]))
-
-  # extract rt_user
-  df$retweet_from <- sapply(df$text, function(tweet)
-    TrimHead(str_extract(tweet,"^[RM]T (@[[:alnum:]_+]*)")))
-
-  return(df)
-}
diff --git a/vosonSML/R/GetYoutubeVideoIDs.R b/vosonSML/R/GetYoutubeVideoIDs.R
index ab366e6..0bc0537 100644
--- a/vosonSML/R/GetYoutubeVideoIDs.R
+++ b/vosonSML/R/GetYoutubeVideoIDs.R
@@ -1,60 +1,51 @@
-#' Extract/scrape the IDs from a set of YouTube video URLs
-#' 
-#' This function reads a list of YouTube video URLs from a text file and
-#' converts them to a vector object. For example,
-#' "https://www.youtube.com/watch?v=73I5dRucCds" has the ID "73I5dRucCds". This
-#' function can be used to create an object for the argument \code{videoIDs} in
-#' the function \code{CollectDataYoutube}, that is, by extracting the IDs for a
-#' set of YouTube videos and compiling them into a vector, ready for collecting
-#' data with \code{CollectDataYoutube}.
-#' 
-#' 
-#' @param file The connection to read from. This can be a local file, or a http
-#' or ftp connection. It can also be a character string with the file name or
-#' URI. The file must be plain text format with the URL of each YouTube video
-#' specified on a new line (separated by character return). For example, the
-#' first line might contain https://www.youtube.com/watch?v=73I5dRucCds, and
-#' the second line might contain https://www.youtube.com/watch?v=6S9r_YbqHy8.
-#' @return a character vector representing a set of YouTube video IDs, each
-#' with number of characters equal to 11 (e.g. "73I5dRucCds").
-#' @note This function is useful for lots of videos. However, many videos may
-#' take a *long* time to collect data from. In such cases it is recommended to
-#' use the \code{verbose=TRUE} argument for the function
-#' \code{CollectDataYoutube}, in order to keep track of progress during
-#' computation.
-#' @author Timothy Graham <timothy.graham@@anu.edu.au> & Robert Ackland
-#' <robert.ackland@@anu.edu.au>
-#' @seealso Use \code{CollectDataYoutube} for collecting YouTube comments data.
+#' Extract the IDs from a set of YouTube video URLs
+#' 
+#' This function reads a list of YouTube video URLs from a text file and converts them to a vector object. For example,
+#' "https://www.youtube.com/watch?v=73I5dRucCds" has the ID "73I5dRucCds". This function can be used to create an 
+#' object for the argument \code{videoIDs} in the function \code{CollectDataYoutube}, that is, by extracting the IDs 
+#' for a set of YouTube videos and compiling them into a vector, ready for collecting data with 
+#' \code{CollectDataYoutube}.
+#' 
+#' @param file The connection to read from. This can be a local file, or a http or ftp connection. It can also be a 
+#' character string with the file name or URI. The file must be plain text format with the URL of each YouTube video
+#' specified on a new line (separated by character return). For example, the first line might contain 
+#' https://www.youtube.com/watch?v=73I5dRucCds, and the second line might contain 
+#' https://www.youtube.com/watch?v=6S9r_YbqHy8.
+#' 
+#' @return a character vector representing a set of YouTube video IDs, each with number of characters equal to 11 
+#' (e.g. "73I5dRucCds").
+#' 
+#' @note This function is useful for lots of videos. However, many videos may take a *long* time to collect data from. 
+#' In such cases it is recommended to use the \code{verbose = TRUE} argument for the function \code{CollectDataYoutube}
+#' , in order to keep track of progress during computation.
+#' 
+#' @seealso \code{CollectDataYoutube}
 #' @keywords youtube scraping vosonSML
-#' @examples
 #' 
+#' @examples
 #' \dontrun{
-#'   ## This example shows how to use `GetYoutubeVideoIDs` to extract video IDs from YouTube
-#'   ## video URLs, and then collect data using the function `CollectDataYoutube`
+#' # this example shows how to use 'GetYoutubeVideoIDs' to extract video IDs from YouTube video 
+#' # URLs, and then collect data using the function 'CollectDataYoutube'
 #' 
-#'   # Use your own Google Developer API Key here:
-#'   myApiKey <- "1234567890"
+#' # set your Google Developer API key
+#' myYtApiKey <- "xxxxxxxxxx"
 #' 
-#'   # Authenticate with the Google API
-#'   apiKeyYoutube <- AuthenticateWithYoutubeAPI(apiKeyYoutube=myApiKey)
+#' # authenticate with the Google API
+#' apiKeyYoutube <- AuthenticateWithYoutubeAPI(apiKeyYoutube = myYtApiKey)
 #' 
-#'   # Use the function `GetYoutubeVideoIDs` to automatically generate vector of IDs from
-#'   # a plain text file of video URLs
-#'   videoIDs <- GetYoutubeVideoIDs(file="youtube_to_scrape.txt")
+#' # use the function 'GetYoutubeVideoIDs' to automatically generate vector of IDs from a plain 
+#' # text file of video URLs
+#' videoIDs <- GetYoutubeVideoIDs(file = "youtube_urls_to_scrape.txt")
 #' 
-#'   # Collect the data using function `CollectDataYoutube`
-#'   myYoutubeData <- CollectDataYoutube(videoIDs,apiKeyYoutube,writeToFile=FALSE)
+#' # collect the data using function 'CollectDataYoutube'
+#' myYoutubeData <- CollectDataYoutube(videoIDs, apiKeyYoutube, writeToFile = FALSE)
 #' }
+#' 
 #' @export
-GetYoutubeVideoIDs <-
-function(file){
-
-  videoIDsTemp <- read.table(file,
-                 sep="\n",
-                 strip.white=TRUE) # in case of user input error
-
+GetYoutubeVideoIDs <- function(file) {
+  
+  videoIDsTemp <- read.table(file, sep = "\n", strip.white = TRUE) # in case of user input error
   videoIDsTemp <- as.vector(videoIDsTemp$V1)
-
-  videoIDsOut <- substr(videoIDsTemp,33,43)
-
+  
+  videoIDsOut <- substr(videoIDsTemp, 33, 43)
 }
diff --git a/vosonSML/R/GraphUserInfoTwitter.R b/vosonSML/R/GraphUserInfoTwitter.R
new file mode 100644
index 0000000..2f5607e
--- /dev/null
+++ b/vosonSML/R/GraphUserInfoTwitter.R
@@ -0,0 +1,91 @@
+#' Create twitter network graph with user information attributes
+#'
+#' Creates a network from the relations and users dataframes generated by Create. Network is supplemented with 
+#' additional downloaded user information applied as node attributes.
+#'
+#' @param df_collect A dataframe containing the collected tweet data from \code{Collect}.
+#' @param df_relations A dataframe containing the network relations data from \code{Create}.
+#' @param df_users A dataframe containing the network users data from \code{Create}.
+#' @param lookup_missing_users Logical. Request user information for any users missing from df_collect. Default 
+#' is \code{TRUE}.
+#' @param twitter_token An twitter authentication token from \code{Authenticate}.
+#' @param writeToFile Logical. If \code{TRUE} a data frame of user information and the resulting network graph will 
+#' be saved to file. Default is \code{FALSE}.
+#'
+#' @note Only supports twitter actor network at this time. Bimodal network support will require the filtering 
+#' of twitter user ids from nodes of other types.
+#'
+#' @return A list containing a dataframe with user information and an igraph object of the twitter network with 
+#' user node attributes.
+#'
+#' @export
+GraphUserInfoTwitter <- function(df_collect, df_relations, df_users, lookup_missing_users = TRUE, 
+                                 twitter_token = NULL, writeToFile = FALSE) {
+  
+  cat("Creating twitter network graph with user information as node attributes...\n")
+  flush.console()
+  
+  df_users %<>% dplyr::mutate_all(as.character) # changes all col types to character
+  
+  df_users_info <- rtweet::users_data(df_collect) %>% dplyr::distinct(.data$user_id, .keep_all = TRUE)
+  df_users_info %<>% dplyr::mutate_all(as.character) # changes all col types to character
+  df_missing_users <- dplyr::anti_join(df_users, df_users_info, by = "user_id") %>% 
+    dplyr::distinct(.data$user_id, .keep_all = TRUE)
+  
+  df_missing_users_info <- NULL
+  if (lookup_missing_users) {
+    if (is.null(twitter_token)) {
+      cat("Please supply rtweet twitter authentication token to look up missing users info.\n")
+    } else {
+      cat(paste0("Fetching user information for ", nrow(df_missing_users), " users.\n"))
+      
+      # 90000 users per 15 mins with unused rate limit
+      df_lookup_data <- rtweet::lookup_users(df_missing_users$user_id, parse = TRUE, 
+                                             token = twitter_token$auth)
+      df_missing_users_info <- rtweet::users_data(df_lookup_data)
+      cat(paste0("User information collected for ", nrow(df_missing_users_info), " users.\n"))
+      
+      if (nrow(df_missing_users) != nrow(df_missing_users_info)) {
+        cat("Collected user records does not match the number requested. Adding incomplete records back in.\n")
+        df_not_collected <- dplyr::anti_join(df_missing_users, df_missing_users_info, by = "user_id")
+        df_missing_users_info <- dplyr::bind_rows(df_missing_users_info, df_not_collected)
+      }
+    }
+  } else {
+    cat("No additional users information fetched.\n")
+  }
+  
+  if (!is.null(df_missing_users_info)) {
+    df_users_info_all <- rbind(df_users_info, df_missing_users_info)
+  } else {
+    df_users_info_all <- dplyr::bind_rows(df_users_info, df_missing_users)
+  }
+  
+  df_users_info_all %<>% dplyr::rename("display_name" = .data$name, "name" = .data$user_id)
+  
+  # fix numeric cols type and replacing na's for convenience
+  # col names ending in "count"
+  df_users_info_all %<>% dplyr::mutate_at(vars(ends_with("count")), funs(ifelse(is.na(.data$.), as.integer(0), 
+                                                                         as.integer(.data$.))))
+  
+  if (!is.null(df_missing_users_info) & writeToFile) {
+    writeOutputFile(df_users_info_all, "rds", "TwitterUserInfo")
+  }
+  
+  g <- graph_from_data_frame(df_relations, directed = TRUE, vertices = df_users_info_all)
+
+  V(g)$screen_name <- ifelse(is.na(V(g)$screen_name), paste0("ID:", V(g)$name), V(g)$screen_name)
+  V(g)$label <- V(g)$screen_name
+  
+  if (writeToFile) { writeOutputFile(g, "graphml", "TwitterUserNetwork") }
+  
+  cat("Done.\n")
+  flush.console()
+  
+  function_output <- list(
+    "users" = df_users_info_all,
+    "graph" = g
+  )
+  
+  return(function_output)
+}
diff --git a/vosonSML/R/importData.R b/vosonSML/R/ImportData.R
similarity index 96%
rename from vosonSML/R/importData.R
rename to vosonSML/R/ImportData.R
index b262d5f..3ce8400 100644
--- a/vosonSML/R/importData.R
+++ b/vosonSML/R/ImportData.R
@@ -28,13 +28,13 @@
 #'      rangeTo="2015-03-02", writeToFile=TRUE)
 #'
 #' # Import the data (that was saved to disk in the previous step)
-#' myStarWarsData <- importData("2015-03-01_to_2015-03-02_StarWars_FacebookData.csv","facebook")
+#' myStarWarsData <- ImportData("2015-03-01_to_2015-03-02_StarWars_FacebookData.csv","facebook")
 #'
 #' # Create a network using the imported dataframe object
 #' myNetwork <- myStarWarsData %>% Create("Bimodal")
 #' }
 #' @export
-importData <- function(file,dataSource) {
+ImportData <- function(file, dataSource) {
   df <- read.csv(file)
   #if(all(colnames(df)==c("X","from","to","edgeType","postType","postLink","postTimestamp","commentText","commentTimestamp"))) {
   #  class(df) <- c("data.table","data.frame","dataSource","facebook")
diff --git a/vosonSML/R/PopulateUserInfo.R b/vosonSML/R/PopulateUserInfo.R
deleted file mode 100644
index f8e0d6a..0000000
--- a/vosonSML/R/PopulateUserInfo.R
+++ /dev/null
@@ -1,187 +0,0 @@
-#' Populate Twitter networks with user information
-#'
-#' This function is used to 'populate' Twitter networks (generated
-#' with the \code{\link{Create}} function) with information about
-#' the users in the network. This involves calls to the Twitter API
-#' to collect this information, which is then applied to the network
-#' as vertex attributes.
-#'
-#' @param networkObject an igraph graph object created with \code{\link{Create}}
-#' @return An igraph graph object
-#' @author Timothy Graham <timothy.graham@@anu.edu.au> & Robert Ackland
-#' <robert.ackland@@anu.edu.au>
-#' @seealso \code{\link{Collect}}, \code{\link{Create}}
-#' @examples
-#'
-#' \dontrun{
-#' require(magrittr)
-#' ## Get Twitter user information and apply to network
-#' myTwitterNetwork_userInfo <- PopulateUserInfo(myTwitterNetwork)
-#'
-#' }
-#' @export
-PopulateUserInfo <- function(networkObject) {
-  name=NULL # appease the gods of R CMD CHECK
-  # This function is supposed to perform the lookups in batches
-  # and mind the rate limit:
-  getUserObjects <- function(users) {
-    groups <- split(users, ceiling(seq_along(users)/50))
-    userObjects <- ldply(groups, function(group) { # ldply is a very cool function, found in plyr package.
-      objects <- lookupUsers(group, includeNA=TRUE)
-      out <- twListToDF(objects) # twListToDF is also a handy function, found in twitteR package. Converts weird class object to data frame.
-        # print("Waiting for 15 minutes (to 'refresh' the rate limit)...") # Don't need to use this yet. Implement later for number of users > 7500 (have to do chunked batches... chunks of chunks... urrghh)
-        # Sys.sleep(900)
-      return(out)
-    })
-    return(userObjects)
-  }
-
-  # get the list of users
-  listOfUsers <- unique(V(networkObject)$name)
-  listOfUsers_actual <- listOfUsers[-grep("^#",listOfUsers)]
-
-  ## Test if it is a Twitter actor network
-  if (length(listOfUsers_actual)<1) {
-
-      # predefine a data table to store the results (later delete unneeded rows)
-      actors <- data.table(
-        name=as.character(c(rep("NA_f00",length(listOfUsers)))),
-        userDescription=as.character(c(rep("NA_f00",length(listOfUsers)))),
-        statusesCount=as.character(c(rep("NA_f00",length(listOfUsers)))),
-        followersCount=as.character(c(rep("NA_f00",length(listOfUsers)))),
-        favoritesCount=as.character(c(rep("NA_f00",length(listOfUsers)))),
-        friendsCount=as.character(c(rep("NA_f00",length(listOfUsers)))),
-        url=as.character(c(rep("NA_f00",length(listOfUsers)))),
-        realName=as.character(c(rep("NA_f00",length(listOfUsers)))),
-        dateAccountCreated=as.character(c(rep("NA_f00",length(listOfUsers)))),
-        userLocation=as.character(c(rep("NA_f00",length(listOfUsers)))),
-        userLanguage=as.character(c(rep("NA_f00",length(listOfUsers)))),
-        numberOfListsUserIsFeaturedOn=as.character(c(rep("NA_f00",length(listOfUsers)))),
-        profileImageUrl=as.character(c(rep("NA_f00",length(listOfUsers))))
-        )
-
-        setkey(actors,name) # set the key value of the data table
-
-        nextEmptyRow <- 1 # so we can update rows in `dataCombined` in a relatively efficient way
-
-          # This function is supposed to perform the lookups in batches
-          # and mind the rate limit:
-          getUserObjects <- function(users) {
-            groups <- split(users, ceiling(seq_along(users)/50))
-            userObjects <- ldply(groups, function(group) { # ldply is a very cool function, found in plyr package.
-              objects <- lookupUsers(group, includeNA=TRUE)
-              out <- twListToDF(objects) # twListToDF is also a handy function, found in twitteR package. Converts weird class object to data frame.
-                # print("Waiting for 15 minutes (to 'refresh' the rate limit)...") # Don't need to use this yet. Implement later for number of users > 7500 (have to do chunked batches... chunks of chunks... urrghh)
-                # Sys.sleep(900)
-              return(out)
-            })
-            return(userObjects)
-          }
-
-        # Collect user data (will return NA for users who don't exist)
-
-          # query the user data
-          cat("\n Fetching the user data...\n") # DEBUG
-          usersInformationAttributes <- getUserObjects(listOfUsers)
-          actorsInfoDF <- usersInformationAttributes
-
-          actors$name <- actorsInfoDF$screenName
-          actors$userDescription <- actorsInfoDF$description
-          actors$statusesCount <- actorsInfoDF$statusesCount
-          actors$followersCount <- actorsInfoDF$followersCount
-          actors$favoritesCount <- actorsInfoDF$favoritesCount
-          actors$friendsCount <- actorsInfoDF$friendsCount
-          actors$url <- actorsInfoDF$url
-          actors$realName <- actorsInfoDF$name
-          actors$dateAccountCreated <- actorsInfoDF$created
-          actors$userLocation <- actorsInfoDF$location
-          actors$userLanguage <- actorsInfoDF$lang
-          actors$numberOfListsUserIsFeaturedOn <- actorsInfoDF$listedCount
-          actors$profileImageUrl <- actorsInfoDF$profileImageUrl
-
-      # the final thing to do is apply the values in `actors` to the networkObject
-
-      V(networkObject)$screenName <- actors$name
-      V(networkObject)$userDescription <- actors$userDescription
-      V(networkObject)$statusesCount <- actors$statusesCount
-      V(networkObject)$followersCount <- actors$followersCount
-      V(networkObject)$favoritesCount <- actors$favoritesCount
-      V(networkObject)$friendsCount <- actors$friendsCount
-      V(networkObject)$url <- actors$url
-      V(networkObject)$realName <- actors$realName
-      V(networkObject)$dateAccountCreated <- actors$dateAccountCreated
-      V(networkObject)$userLocation <- actors$userLocation
-      V(networkObject)$userLanguage <- actors$userLanguage
-      V(networkObject)$numberOfListsUserIsFeaturedOn <- actors$numberOfListsUserIsFeaturedOn
-      V(networkObject)$profileImageUrl <- actors$profileImageUrl
-
-      return(networkObject)
-
-  }
-
-  ## Test if it is a Twitter bimodal network
-  if (length(listOfUsers_actual)>=1) {
-
-    # predefine a data table to store the results (later delete unneeded rows)
-    actors <- data.table(
-      name=as.character(c(rep(NA,length(listOfUsers)))),
-      userDescription=as.character(c(rep(NA,length(listOfUsers)))),
-      statusesCount=as.character(c(rep(NA,length(listOfUsers)))),
-      followersCount=as.character(c(rep(NA,length(listOfUsers)))),
-      favoritesCount=as.character(c(rep(NA,length(listOfUsers)))),
-      friendsCount=as.character(c(rep(NA,length(listOfUsers)))),
-      url=as.character(c(rep(NA,length(listOfUsers)))),
-      realName=as.character(c(rep(NA,length(listOfUsers)))),
-      dateAccountCreated=as.character(c(rep(NA,length(listOfUsers)))),
-      userLocation=as.character(c(rep(NA,length(listOfUsers)))),
-      userLanguage=as.character(c(rep(NA,length(listOfUsers)))),
-      numberOfListsUserIsFeaturedOn=as.character(c(rep(NA,length(listOfUsers)))),
-      profileImageUrl=as.character(c(rep(NA,length(listOfUsers))))
-    )
-
-      setkey(actors,name) # set the key value of the data table
-
-      # query the user data
-      cat("\n Fetching the user data...\n") # DEBUG
-      usersInformationAttributes <- getUserObjects(listOfUsers_actual) # exclude hashtag vertices
-      actorsInfoDF <- usersInformationAttributes
-      actorsInfoDF_hashtag_NA <-
-
-      # but this does not give us data for 'hashtag' type vertices (none exists of course)
-      # so, need to fill in this information manually
-
-      actors$name[1:length(listOfUsers_actual)] <- actorsInfoDF$screenName
-      actors$userDescription[1:length(listOfUsers_actual)] <- actorsInfoDF$description
-      actors$statusesCount[1:length(listOfUsers_actual)] <- actorsInfoDF$statusesCount
-      actors$followersCount[1:length(listOfUsers_actual)] <- actorsInfoDF$followersCount
-      actors$favoritesCount[1:length(listOfUsers_actual)] <- actorsInfoDF$favoritesCount
-      actors$friendsCount[1:length(listOfUsers_actual)] <- actorsInfoDF$friendsCount
-      actors$url[1:length(listOfUsers_actual)] <- actorsInfoDF$url
-      actors$realName[1:length(listOfUsers_actual)] <- actorsInfoDF$name
-      actors$dateAccountCreated[1:length(listOfUsers_actual)] <- actorsInfoDF$created
-      actors$userLocation[1:length(listOfUsers_actual)] <- actorsInfoDF$location
-      actors$userLanguage[1:length(listOfUsers_actual)] <- actorsInfoDF$lang
-      actors$numberOfListsUserIsFeaturedOn[1:length(listOfUsers_actual)] <- actorsInfoDF$listedCount
-      actors$profileImageUrl[1:length(listOfUsers_actual)] <- actorsInfoDF$profileImageUrl
-
-      # the final thing to do is apply the values in `actors` to the networkObject
-
-      V(networkObject)$screenName <- actors$name
-      V(networkObject)$userDescription <- actors$userDescription
-      V(networkObject)$statusesCount <- actors$statusesCount
-      V(networkObject)$followersCount <- actors$followersCount
-      V(networkObject)$favoritesCount <- actors$favoritesCount
-      V(networkObject)$friendsCount <- actors$friendsCount
-      V(networkObject)$url <- actors$url
-      V(networkObject)$realName <- actors$realName
-      V(networkObject)$dateAccountCreated <- actors$dateAccountCreated
-      V(networkObject)$userLocation <- actors$userLocation
-      V(networkObject)$userLanguage <- actors$userLanguage
-      V(networkObject)$numberOfListsUserIsFeaturedOn <- actors$numberOfListsUserIsFeaturedOn
-      V(networkObject)$profileImageUrl <- actors$profileImageUrl
-
-      return(networkObject)
-
-  }
-
-}
diff --git a/vosonSML/R/PreprocessTweets.R b/vosonSML/R/PreprocessTweets.R
deleted file mode 100644
index 356d542..0000000
--- a/vosonSML/R/PreprocessTweets.R
+++ /dev/null
@@ -1,13 +0,0 @@
-PreprocessTweets <-
-function(df) {
-  # Perform a few preprocessing tasks
-
-  # removing odd characters
-  df.new <- RemoveOddChars(df)
-  # extract user info and add to df
-  df.new <- ExtractUserInfo(df.new)
-  # extract urls and add to df
-  df.new <- ExtractUrls(df.new)
-
-  return(df.new)
-}
diff --git a/vosonSML/R/RemoveOddChars.R b/vosonSML/R/RemoveOddChars.R
deleted file mode 100644
index 56aa21b..0000000
--- a/vosonSML/R/RemoveOddChars.R
+++ /dev/null
@@ -1,6 +0,0 @@
-RemoveOddChars <-
-function(df) {
-  # Remove odd characters in tweets
-  df$text <- sapply(df$text, function(x) TrimOddChar(x))
-  return(df)
-}
diff --git a/vosonSML/R/RemoveOddCharsUserInfo.R b/vosonSML/R/RemoveOddCharsUserInfo.R
deleted file mode 100644
index cb70c27..0000000
--- a/vosonSML/R/RemoveOddCharsUserInfo.R
+++ /dev/null
@@ -1,13 +0,0 @@
-RemoveOddCharsUserInfo <-
-function(actorsInfoDF) {
-  # Remove odd characters in the user information attributes
-  # Odd characters is especially problematic for search queries that trawl non-English speaking users/collectives.
-  actorsInfoDF$screenName <- sapply(actorsInfoDF$screenName, function(x) TrimOddChar(x))
-  actorsInfoDF$description <- sapply(actorsInfoDF$description, function(x) TrimOddChar(x))
-  actorsInfoDF$url <- sapply(actorsInfoDF$url, function(x) TrimOddChar(x))
-  actorsInfoDF$name <- sapply(actorsInfoDF$name, function(x) TrimOddChar(x))
-  actorsInfoDF$location <- sapply(actorsInfoDF$location, function(x) TrimOddChar(x))
-  actorsInfoDF$lang <- sapply(actorsInfoDF$lang, function(x) TrimOddChar(x))
-  actorsInfoDF$profileImageUrl <- sapply(actorsInfoDF$profileImageUrl, function(x) TrimOddChar(x))
-  return(actorsInfoDF)
-}
diff --git a/vosonSML/R/SaveCredential.R b/vosonSML/R/SaveCredential.R
new file mode 100644
index 0000000..7e836db
--- /dev/null
+++ b/vosonSML/R/SaveCredential.R
@@ -0,0 +1,51 @@
+#' Save and load credential information
+#'
+#' Functions to save and load credential information. Currently, credential information will be stored as a RDS file. 
+#' \code{SaveCredential} will return the input \code{credential}, useful for working as a filter between 
+#' \code{Authenticate} and \code{Collect}.
+#'
+#' @aliases SaveCredential LoadCredential
+#' 
+#' @param credential A \code{credential} object.
+#' @param filename Character string. Filename to be saved to or restored from. Default value is \code{credential.RDS}.
+#' 
+#' @return A \code{credential} object.
+#' 
+#' @examples
+#' \dontrun{
+#' require(magrittr)
+#' 
+#' ## save credential example
+#' 
+#' myIgAppID <- "xxxxxxxxxxx"
+#' myIgAppSecret <- "xxxxxxxxxxxxxxxxxxxxxx"
+#' listIgUsernames <- c("senjohnmccain", "obama")
+#'
+#' Authenticate("instagram", appID = myIgAppID, appSecret = myIgAppSecret) %>% 
+#'   SaveCredential("instagramCred.RDS") %>% 
+#'   Collect(ego = TRUE, username = listIgUsernames) %>% Create()
+#'
+#' ## load previously saved credential example
+#' 
+#' LoadCredential("instagramCred.RDS") %>% 
+#'   Collect(tag = "obama", distance = 5000, n = 100) %>% Create("bimodal")
+#' }
+#' 
+#' @export
+SaveCredential <- function(credential, filename) {
+  if (missing(credential) || missing(filename)) {
+    stop("please supply a credential object and credential file name to save.")
+  }  
+  saveRDS(credential, filename)
+  return(credential)
+}
+
+#' @rdname SaveCredential
+#' @export
+LoadCredential <- function(filename) {
+  if (missing(filename)) {
+    stop("please supply a credential file name to load.")
+  }    
+  credential <- readRDS(filename)
+  return(credential)
+}
diff --git a/vosonSML/R/TrimAt.R b/vosonSML/R/TrimAt.R
deleted file mode 100644
index 76dd144..0000000
--- a/vosonSML/R/TrimAt.R
+++ /dev/null
@@ -1,6 +0,0 @@
-TrimAt <-
-function(x) {
-  # remove @ from text
-
-  sub('@', '', x)
-}
diff --git a/vosonSML/R/TrimHashtags.R b/vosonSML/R/TrimHashtags.R
deleted file mode 100644
index 469f57c..0000000
--- a/vosonSML/R/TrimHashtags.R
+++ /dev/null
@@ -1,6 +0,0 @@
-TrimHashtags <-
-function(x) {
-  # remove hashtags, i.e. "#tag", in a tweet
-
-  str_replace_all(x, '(#[[:alnum:]_]*)', '')
-}
diff --git a/vosonSML/R/TrimHead.R b/vosonSML/R/TrimHead.R
deleted file mode 100644
index f4d0477..0000000
--- a/vosonSML/R/TrimHead.R
+++ /dev/null
@@ -1,6 +0,0 @@
-TrimHead <-
-function(x) {
-  # remove starting @, .@, RT @, MT @, etc.
-
-  sub('^(.*)?@', '', x)
-}
diff --git a/vosonSML/R/TrimOddChar.R b/vosonSML/R/TrimOddChar.R
deleted file mode 100644
index d7ef9ef..0000000
--- a/vosonSML/R/TrimOddChar.R
+++ /dev/null
@@ -1,11 +0,0 @@
-TrimOddChar <-
-function(x) {
-  # remove odd charactors
-  iconv(x, to = 'utf-8')
-}
-
-TrimOddCharMac <-
-function(x) {
-  # remove odd charactors
-  iconv(x, to = 'utf-8-mac')
-}
diff --git a/vosonSML/R/TrimUrls.R b/vosonSML/R/TrimUrls.R
deleted file mode 100644
index d84eea0..0000000
--- a/vosonSML/R/TrimUrls.R
+++ /dev/null
@@ -1,6 +0,0 @@
-TrimUrls <-
-function(x) {
-  # remove urls in a tweet
-
-  str_replace_all(x, 'http[^[:blank:]]+', '')
-}
diff --git a/vosonSML/R/TrimUsers.R b/vosonSML/R/TrimUsers.R
deleted file mode 100644
index 801203f..0000000
--- a/vosonSML/R/TrimUsers.R
+++ /dev/null
@@ -1,6 +0,0 @@
-TrimUsers <-
-function(x) {
-  # remove users, i.e. "@user", in a tweet
-
-  str_replace_all(x, '(@[[:alnum:]_]*)', '')
-}
diff --git a/vosonSML/R/utils.R b/vosonSML/R/Utils.R
similarity index 84%
rename from vosonSML/R/utils.R
rename to vosonSML/R/Utils.R
index 7e8e46b..1cd862a 100644
--- a/vosonSML/R/utils.R
+++ b/vosonSML/R/Utils.R
@@ -28,8 +28,7 @@ systemTimeFilename <- function(name_suffix, name_ext, current_time = NULL, clean
 
 # write data to file as type
 writeOutputFile <- function(data, type, name, msg = TRUE) {
-  # package <- environmentName(environment(writeOutputFile))
-  
+
   if (missing(type)) {
     type <- "rds"
   }
@@ -70,3 +69,18 @@ writeOutputFile <- function(data, type, name, msg = TRUE) {
     return(NULL)
   })
 }
+
+# installs and loads a package if necessary
+EnsurePackage <- function(x) {
+  x <- as.character(x)
+  if (!require(x, character.only = TRUE)) {
+    install.packages(pkgs = x, repos = "http://cran.r-project.org")
+    require(x, character.only = TRUE)
+  }
+}
+
+quiet <-function(x) {
+  sink(tempfile())
+  on.exit(sink())
+  invisible(force(x))
+}
diff --git a/vosonSML/R/UtilsTwitter.R b/vosonSML/R/UtilsTwitter.R
new file mode 100644
index 0000000..3f91395
--- /dev/null
+++ b/vosonSML/R/UtilsTwitter.R
@@ -0,0 +1,180 @@
+# remove odd characters in tweets
+RemoveOddChars <- function(df) {
+  df$text <- sapply(df$text, function(x) TrimOddChar(x))
+  
+  return(df)
+}
+
+# for each tweet, extract information related to users
+# such as to_user, rt_user etc.
+ExtractUserInfo <- function(df) {
+  
+  # extract to_user
+  df$reply_to <- sapply(df$text, function(tweet) TrimHead(str_extract(tweet,"^((\\.)?(@[[:alnum:]_+]*))")))
+  
+  # extract any mentions at all (inc. replies, mentions, etc) 
+  # this is a completely new approach - it 'vacuums' up ANY mentions
+  df$users_mentioned <- sapply(df$text, function(tweet) TrimHead(str_match_all(tweet,"@[[:alnum:]_+]*")[[1]]))
+  
+  # extract rt_user
+  df$retweet_from <- sapply(df$text, function(tweet) TrimHead(str_extract(tweet,"^[RM]T (@[[:alnum:]_+]*)")))
+  
+  return(df)
+}
+
+# for each tweet, extract any hashtags that a user has used
+ExtractHashtagInfo <- function(df) {
+  
+  df$hashtags_used <- sapply(df$text, function(tweet) regmatches(tweet, gregexpr("#[^#\\s]+(?!\u2026)\\b", 
+                                                                                 tweet, perl = T)))
+  
+  # old way:
+  # TrimHead(str_match_all(tweet,"#[[:alnum:]_+]*")[[1]])
+  
+  # new way:
+  # this matches hashtags, but not if the hashtag is "cut off" at the end of the tweet text, denoted by a 'trailing 
+  # ellipsis' character. this avoids the problem of picking up erroneous hashtags that are cut off, e.g. "#ausp..." 
+  # when it should be "#auspol"
+  
+  # horizontalEllipsis <- "\u2026"
+  # horizontalEllipsisFixed <- stri_unescape_unicode(horizontalEllipsis)
+  
+  # patternRegex <- paste0("#[^#\\s]+(?!\\\u2026)\\b")
+  # TrimHead(str_match_all(tweet,paste0("#[[:alnum:]_+^",horizontalEllipsis,"$]*"))[[1]])
+  
+  return(df)
+}
+
+# for each tweet, extract url, remove it from the tweet, and put them separately in a new column
+# todo: cannot deal with multiple urls in one tweet right now
+ExtractUrls <- function(df) {
+  # EnsurePackage("stringr")
+  # EnsurePackage("grid")
+  
+  # extracts links (quick and dirty)
+  # wish to have something like http://daringfireball.net/2009/11/liberal_regex_for_matching_urls
+  df$links <- sapply(df$text,function(tweet) str_extract(tweet,("http[^[:blank:]]+")))
+  df$text <- sapply(df$text, function(x) TrimUrls(x))
+  
+  return(df)
+}
+
+# remove odd characters in the user information attributes
+# odd characters are especially problematic for search queries that trawl non-english speaking users/collectives
+RemoveOddCharsUserInfo <- function(actorsInfoDF) {
+  
+  actorsInfoDF$screenName <- sapply(actorsInfoDF$screenName, function(x) TrimOddChar(x))
+  actorsInfoDF$description <- sapply(actorsInfoDF$description, function(x) TrimOddChar(x))
+  actorsInfoDF$url <- sapply(actorsInfoDF$url, function(x) TrimOddChar(x))
+  actorsInfoDF$name <- sapply(actorsInfoDF$name, function(x) TrimOddChar(x))
+  actorsInfoDF$location <- sapply(actorsInfoDF$location, function(x) TrimOddChar(x))
+  actorsInfoDF$lang <- sapply(actorsInfoDF$lang, function(x) TrimOddChar(x))
+  actorsInfoDF$profileImageUrl <- sapply(actorsInfoDF$profileImageUrl, function(x) TrimOddChar(x))
+  
+  return(actorsInfoDF)
+}
+
+# trim functions
+
+# remove users, i.e. "@user", in a tweet
+TrimUsers <- function(x) {
+  str_replace_all(x, '(@[[:alnum:]_]*)', '')
+}
+
+# remove urls in a tweet
+TrimUrls <- function(x) {
+  str_replace_all(x, 'http[^[:blank:]]+', '')
+}
+
+# remove odd charactors
+TrimOddChar <- function(x) {
+  iconv(x, to = 'utf-8')
+}
+
+# remove odd charactors
+TrimOddCharMac <- function(x) {
+  iconv(x, to = 'utf-8-mac')
+}
+
+# remove starting @, .@, RT @, MT @, etc.
+TrimHead <- function(x) {
+  sub('^(.*)?@', '', x)
+}
+
+# remove hashtags, i.e. "#tag", in a tweet
+TrimHashtags <- function(x) {
+  str_replace_all(x, '(#[[:alnum:]_]*)', '')
+}
+
+# remove @ from text
+TrimAt <- function(x) {
+  sub('@', '', x)
+}
+
+PreprocessTweets <- function(df) {
+  # Perform a few preprocessing tasks
+  
+  # removing odd characters
+  df.new <- RemoveOddChars(df)
+  # extract user info and add to df
+  df.new <- ExtractUserInfo(df.new)
+  # extract urls and add to df
+  df.new <- ExtractUrls(df.new)
+  
+  return(df.new)
+}
+
+# accepts a df to add or increment a field value with count
+networkStats <- function(df, field, count, edge, print) {
+  if (missing(print)) {
+    print <- FALSE
+  }
+  
+  if (missing(edge)) {
+    edge <- FALSE
+  }
+  
+  if (print == TRUE) {
+    if (!is.null(df) & nrow(df) > 0) {
+      lf <- lc <- 0
+      for (i in 1:nrow(df)) {
+        lf <- ifelse(nchar(df$field[i]) > lf, nchar(df$field[i]), lf)
+        lc <- ifelse(nchar(df$count[i]) > lc, nchar(df$count[i]), lc)
+      }
+      
+      for (i in 1:nrow(df)) {
+        lfm <- lf
+        if (nchar(df$field[i]) != lf) {
+          lfm <-lf + 1
+        }
+        line <- paste0(df$field[i], paste0(replicate(lfm - nchar(df$field[i]), ""), collapse = " "), " | ")
+        line <- paste0(line, df$count[i], paste0(replicate(lc - nchar(df$count[i]), ""), collapse = " "), "\n")
+        cat(line)
+      }
+    }
+    
+    return(TRUE)
+  }
+  
+  if (is.null(df)) {
+    df  <- data.frame("field" = character(0), "count" = character(0), "edge_count" = character(0), 
+                      stringsAsFactors = FALSE)
+  }
+  df <- rbind(df, list(field = field, count = count, edge_count = edge), stringsAsFactors = FALSE)
+  
+  return(df)
+}
+
+printTwitterRateLimit <- function(token) {
+  rtlimit <- rtweet::rate_limit(token, "search/tweets")
+  remaining <- rtlimit[["remaining"]] * 100
+  reset <- rtlimit[["reset"]]
+  reset <- as.numeric(reset, "secs")
+  cat(paste0("remaining search num / 15 mins: ", remaining, "\n"))
+  cat(paste0("reset: ", reset, " secs\n"))
+}
+
+getRemainingSearchNum <- function(token) {
+  rtlimit <- rtweet::rate_limit(token, "search/tweets")
+  remaining <- rtlimit[["remaining"]] * 100
+}
\ No newline at end of file
diff --git a/vosonSML/R/UtilsYoutube.R b/vosonSML/R/UtilsYoutube.R
new file mode 100644
index 0000000..faf16b2
--- /dev/null
+++ b/vosonSML/R/UtilsYoutube.R
@@ -0,0 +1,84 @@
+SearchCommentsForMentions <- function(commentsTextCleaned, usernamesCleaned) {
+  
+  ptm <- proc.time()
+  
+  matchTemp <- lapply(commentsTextCleaned, function(x) {
+    tempResult <- lapply(usernamesCleaned, function(y) {
+      foo <- grep(paste("(\\+|\\@)", y, sep=""), x)
+      
+      if(length(foo) > 0) {
+        return(y)
+      } else {
+        return("FALSE")
+      }
+    })
+  })
+  
+  matchTemp <- unlist(matchTemp)
+  # matchTemp <- as.vector(matchTemp)
+  # matchTemp <- iconv(matchTemp, to = 'UTF-8')
+  
+  # have to split `matchTemp` into as many groups as there are rows (i.e. comment texts)
+  matchTemp2 <- split(matchTemp, ceiling(seq_along(matchTemp) / length(commentsTextCleaned)))
+  
+  # Now we want to retrieve the username with MAX CHARACTERS that was mentioned, or if all values were "FALSE" then 
+  # just return a single "FALSE" value.
+  # THE REASON IS:
+  # If we have the following comment text: "+Timothy some text", and there are two users in the data, namely "Tim" 
+  # and "Timothy", the `grep` will have matched both of these in the comment text.
+  # So, we want to ensure it takes the username with more characters (i.e. "Timothy"), rather than the subset 
+  # match (i.e. "Tim").
+  matchTemp3 <- tryCatch({
+    lapply(matchTemp2, function(x) {
+      
+      # if length of element is 0 then return FALSE
+      # if (length(x)==0) {
+      #  return("FALSE")
+      # }
+      
+      # if all elements == "FALSE" then just return "FALSE"
+      if (length(x[which(x=="FALSE")]) == length(x)) {
+        # cat("\nAll elements of list slice are FALSE\n")
+        return("FALSE")
+      }
+      
+      # if all elements except one == "FALSE" then return the 'non false' element
+      # e.g. c("FALSE", "FALSE", "Timothy", "FALSE") --> returns "Timothy"
+      if (length(x[which(x != "FALSE")]) == 1) {
+        # cat("\nFound 1 non-false ELEMENT:\n")
+        # cat(paste0(x[which(x!="FALSE")],"\n"))
+        return(x[which(x != "FALSE")])
+        
+      } else {
+        tempResult <- x[which(x != "FALSE")]
+        # if two duplicate results (e.g. "Timothy" and "Timothy"), then just return the 1st
+        tempResult <- x[which(nchar(x) == max(nchar(x)))][1]
+        
+        # cat("\nTwo or more results found:\n")
+        # cat(x[which(x != "FALSE")])
+        # cat("\n")
+        return(tempResult)
+        # return(max(nchar(x)))
+      }
+    })
+  }, error = function(err) {
+    # error handler picks up where error was generated
+    print(paste("\nI caught an error (are there mentions/replies between users in the comments for your video(s)? :\n", 
+                err))
+    return(matchTemp2) # if it catches an error, we just return the original object
+  }) # end tryCatch
+  
+  # debugResultDF <- data.frame(commentsTextCleaned, usernamesCleaned, unlist(matchTemp3)) # DEBUG
+  finalMatchesTemp <- as.vector(unlist(matchTemp3))
+  
+  # convert back (or 'de-regex') the username characters
+  finalMatches <- gsub("\\\\","",finalMatchesTemp)
+  
+  # functionRunTime <- proc.time() - ptm                # DEBUG
+  # print("Runtime of FindMentions function was:")      # DEBUG
+  # flush.console()                                     # DEBUG
+  # print(functionRunTime)                              # DEBUG
+  # flush.console()                                     # DEBUG
+  
+  return (finalMatches)
+}
diff --git a/vosonSML/R/authenticateWithYoutubeAPI.R b/vosonSML/R/authenticateWithYoutubeAPI.R
deleted file mode 100644
index af818f1..0000000
--- a/vosonSML/R/authenticateWithYoutubeAPI.R
+++ /dev/null
@@ -1,18 +0,0 @@
-#' YouTube API Authentication
-#'
-#' OAuth based authentication with the Google API.
-#'
-#' In order to collect data from YouTube, the user must first authenticate with Google's Application Programming 
-#' Interface (API). Users can obtain a Google Developer API key at: https://console.developers.google.com.
-#'
-#' @param apiKeyYoutube character string specifying your Google Developer API key.
-#' 
-#' @return This is called for its side effect.
-#' 
-#' @note In the future this function will enable users to save the API key in working directory, and the function will 
-#' automatically look for a locally stored key whenever it is called without apiKeyYoutube argument.
-#'
-#' @noRd
-authenticateWithYoutubeAPI <- function(apiKeyYoutube) {
-  return(apiKeyYoutube)
-}
diff --git a/vosonSML/R/quiet.R b/vosonSML/R/quiet.R
deleted file mode 100644
index 5c7ddec..0000000
--- a/vosonSML/R/quiet.R
+++ /dev/null
@@ -1,6 +0,0 @@
-quiet <-
-function(x) {
-  sink(tempfile())
-  on.exit(sink())
-  invisible(force(x))
-}
diff --git a/vosonSML/R/searchCommentsForMentions.R b/vosonSML/R/searchCommentsForMentions.R
deleted file mode 100644
index 782bace..0000000
--- a/vosonSML/R/searchCommentsForMentions.R
+++ /dev/null
@@ -1,97 +0,0 @@
-searchCommentsForMentions <-
-function (commentsTextCleaned,usernamesCleaned) {
-
-  ptm <- proc.time() # Start the timer # DEBUG
-
-  matchTemp <- lapply(commentsTextCleaned, function(x) {
-
-      tempResult <- lapply(usernamesCleaned, function(y) {
-
-        foo <- grep(paste("(\\+|\\@)", y, sep=""),x)
-
-          if(length(foo)>0){
-            return(y)
-          }
-          else {
-            return("FALSE")
-          }
-
-        }
-      )
-    }
-  )
-
-  matchTemp <- unlist(matchTemp)
-  # matchTemp <- as.vector(matchTemp)
-  # matchTemp <- iconv(matchTemp, to = 'UTF-8')
-
-  # have to split `matchTemp` into as many groups as there are rows (i.e. comment texts)
-  matchTemp2 <- split(matchTemp, ceiling(seq_along(matchTemp)/length(commentsTextCleaned)))
-
-  # Now we want to retrieve the username with MAX CHARACTERS that was mentioned,
-  # or if all values were "FALSE" then just return a single "FALSE" value.
-  # THE REASON IS:
-  # If we have the following comment text: "+Timothy some text",
-  # and there are two users in the data, namely "Tim" and "Timothy",
-  # the `grep` will have matched both of these in the comment text.
-  # So, we want to ensure it takes the username with more characters (i.e. "Timothy"),
-  # rather than the subset match (i.e. "Tim").
-
-  matchTemp3 <- tryCatch({
-
-      lapply(matchTemp2, function(x) {
-
-        # if length of element is 0 then return FALSE
-          # if (length(x)==0) {
-          #  return("FALSE")
-          # }
-
-        # if all elements == "FALSE" then just return "FALSE"
-          if (length(x[which(x=="FALSE")])==length(x)) {
-    # cat("\nAll elements of list slice are FALSE\n")               # DEBUG
-            return("FALSE")
-          }
-
-        # if all elements except one == "FALSE" then return the 'non false' element
-        # e.g. c("FALSE", "FALSE", "Timothy", "FALSE") ---> returns "Timothy"
-          if (length(x[which(x!="FALSE")])==1){
-    # cat("\nFound 1 non-false ELEMENT:\n")                         # DEBUG
-    # cat(paste0(x[which(x!="FALSE")],"\n"))                        # DEBUG
-            return(x[which(x!="FALSE")])
-          }
-
-          else {
-            tempResult <- x[which(x!="FALSE")]
-            tempResult <- x[which(nchar(x)==max(nchar(x)))][1] # if two duplicate results (e.g. "Timothy" and "Timothy"), then just return the 1st
-    # cat("\nTwo or more results found:\n")                         # DEBUG
-    # cat("\nTwo or more results found:\n")                         # DEBUG
-    # cat(x[which(x!="FALSE")])
-    # cat("\n")
-            return(tempResult)
-            # return(max(nchar(x))) #DEBUG
-          }
-          })
-
-  }, error = function(err) {
-
-    # error handler picks up where error was generated
-    print(paste("\nI caught an error (are there mentions/replies between users in the comments for your video(s)? :\n",err))
-    return(matchTemp2) # if it catches an error, we just return the original object
-
-  }) # END tryCatch
-
-  # debugResultDF <- data.frame(commentsTextCleaned,usernamesCleaned,unlist(matchTemp3)) #DEBUG
-  finalMatchesTemp <- as.vector(unlist(matchTemp3))
-
-  # convert back (or 'de-regex') the username characters
-  finalMatches <- gsub("\\\\","",finalMatchesTemp)
-
-  #functionRunTime <- proc.time() - ptm                # DEBUG
-  #print("Runtime of FindMentions function was:")      # DEBUG
-  #flush.console()                                     # DEBUG
-  #print(functionRunTime)                              # DEBUG
-  #flush.console()                                     # DEBUG
-
-  return (finalMatches)
-
-}
diff --git a/vosonSML/R/vosonSML-package.R b/vosonSML/R/vosonSML-package.R
index 0fd95d3..12f175d 100644
--- a/vosonSML/R/vosonSML-package.R
+++ b/vosonSML/R/vosonSML-package.R
@@ -1,20 +1,20 @@
 #' Collection and network analysis of social media data
 #'
-#' The goal of the vosonSML package is to provide a suite of easy-to-use tools for collecting data from social media 
-#' sources (Instagram, Facebook, Twitter, Youtube, and Reddit) and generating different types of networks suited to 
-#' Social Network Analysis (SNA) and text analytics. It offers tools to create unimodal, multimodal, semantic, and 
-#' dynamic networks. It draws on excellent packages such as \pkg{twitteR}, \pkg{instaR}, \pkg{Rfacebook}, 
-#' \pkg{RedditExtractoR} and \pkg{igraph} in order to provide an integrated 'work flow' for collecting different types 
-#' of social media data and creating different types of networks out of these data. Creating networks from social media 
+#' The goal of the vosonSML package is to provide a suite of easy-to-use tools for collecting data from social media
+#' sources (Instagram, Facebook, Twitter, Youtube, and Reddit) and generating different types of networks suited to
+#' Social Network Analysis (SNA) and text analytics. It offers tools to create unimodal, multimodal, semantic, and
+#' dynamic networks. It draws on excellent packages such as \pkg{rtweet}, \pkg{instaR}, \pkg{Rfacebook},
+#' \pkg{RedditExtractoR} and \pkg{igraph} in order to provide an integrated 'work flow' for collecting different types
+#' of social media data and creating different types of networks out of these data. Creating networks from social media
 #' data is often non-trivial and time consuming. This package simplifies such tasks so users can focus on analysis.
 #'
-#' vosonSML uses a straightforward S3 class system. Data collected with this package produces \code{data.table} objects 
-#' (extension of class \code{data.frame}), which are assigned the class \code{dataSource}. Additionally, 
-#' \code{dataSource} objects are assigned a class identifying the source of data, e.g. \code{facebook} or \code{youtube}
-#' . In this way, \code{dataSource} objects are fast, easy to work with, and can be used as input to easily construct 
-#' different types of networks. For example, the function \code{\link{Collect}} can be used to collect Twitter data, 
-#' which is then 'piped' to the \code{\link{Create}} function, resulting in a network (an igraph object) that is ready 
-#' for analysis.
+#' vosonSML uses a straightforward S3 class system. Data collected with this package produces \code{data.table} objects
+#' (extension of class \code{data.frame}), which are assigned the class \code{dataSource}. Additionally,
+#' \code{dataSource} objects are assigned a class identifying the source of data, e.g. \code{facebook} or
+#' \code{youtube}. In this way, \code{dataSource} objects are fast, easy to work with, and can be used as input to
+#' easily construct different types of networks. For example, the function \code{Collect} can be used to collect
+#' Twitter data, which is then 'piped' to the \code{Create} function, resulting in a network (an igraph object)
+#' that is ready for analysis.
 #'
 #' @name vosonSML-package
 #' @aliases vosonSML-package vosonSML
@@ -29,17 +29,18 @@
 #' @import methods
 #' @import httr
 #' @importFrom Hmisc escapeRegex
-#' @importFrom igraph delete.vertices graph.data.frame simplify write.graph V 'V<-' set.graph.attribute
+#' @importFrom igraph delete.vertices graph.data.frame simplify write.graph V 'V<-' set.graph.attribute vcount
 #' graph_from_data_frame delete_vertex_attr set_graph_attr
 #' @importFrom Rfacebook fbOAuth getPost getPage getUsers
 #' @importFrom instaR getComments getLikes instaOAuth searchInstagram getUser getFollowers getFollows
 #' @importFrom plyr ldply
-#' @importFrom twitteR lookupUsers searchTwitter setup_twitter_oauth twListToDF
+#' @importFrom rtweet create_token rate_limit search_tweets users_data lookup_users
 #' @importFrom stringr str_extract str_replace_all str_match_all
 #' @importFrom stats 'na.omit'
 #' @importFrom utils "flush.console" head "install.packages" "read.table" "write.csv" "read.csv"
 #' @importFrom RedditExtractoR reddit_content user_network
-#' @importFrom magrittr '%>%'
+#' @importFrom magrittr '%>%' '%<>%'
 #' @importFrom dplyr rename group_by summarise ungroup left_join select mutate filter coalesce row_number
+#' distinct anti_join mutate_all mutate_at ends_with vars funs
 #' @importFrom rlang '.data'
 NULL
diff --git a/vosonSML/man/Authenticate.Rd b/vosonSML/man/Authenticate.Rd
index 7affa01..536e0c7 100644
--- a/vosonSML/man/Authenticate.Rd
+++ b/vosonSML/man/Authenticate.Rd
@@ -2,67 +2,71 @@
 % Please edit documentation in R/Authenticate.R
 \name{Authenticate}
 \alias{Authenticate}
-\title{Create credential to access social media APIs}
+\title{Create a credential to access social media APIs}
 \usage{
 Authenticate(socialmedia, ...)
 }
 \arguments{
-\item{socialmedia}{character string, social media API to authenticate,
-currently supports "facebook", "youtube", "twitter", "instagram" and "reddit"}
+\item{socialmedia}{Character string. Identifier for social media API to authenticate.\cr
+Supports: \code{"twitter"}, \code{"youtube"}, \code{"reddit"}, \code{"instagram"} and \code{"facebook"}.}
 
-\item{...}{additional parameters for authentication
-\code{facebook}: appID, appSecret
-\code{youtube}: apiKey
-\code{twitter}: apiKey, apiSecret, accessToken, accessTokenSecret
-\code{instagram}: appID, appSecret
-\code{reddit}: appName, appKey, appSecret, useTokenCache}
+\item{...}{Additional parameters for authentication appropriate to \code{socialmedia} identifier.
+\describe{
+  \item{twitter:}{\code{[appName], apiKey, apiSecret, accessToken, 
+                        accessTokenSecret, [useCachedToken]}}
+  \item{youtube:}{\code{apiKey}}
+  \item{reddit:}{\code{[appName], appKey, appSecret, [useCachedToken]}}
+  \item{instagram:}{\code{appID, appSecret, [useCachedToken]}}
+  \item{facebook:}{\code{appID, appSecret, [extendedPermissions, useCachedToken]}}
+}}
 }
 \value{
-credential object with authentication information
+A \code{credential} object with authentication information.
 }
 \description{
-\code{Authenticate} creates a \code{credential} object that enables R to
-make authenticated calls to social media APIs.  A \code{credential} object
-is a S3 object with the authentication-related information such as access
-tokens and the information on the social media that grant authentication.
-\code{Authenticate} is the first step of the \code{Authenticate},
-\code{Collect}, \code{Create} workflow.
+\code{Authenticate} creates a \code{credential} object that enables R to make authenticated calls to social media
+APIs. A \code{credential} object is a S3 object with the authentication-related information such as access tokens
+and the information on the social media that grant authentication. \code{Authenticate} is the first step of the
+\code{Authenticate}, \code{\link{Collect}} and \code{\link{Create}} workflow.
 }
 \note{
-Currently, \code{Authenticate} with socialmedia = "twitter" generates
-oauth information to be used in the current active session only (i.e.
-"side-effect") and no authentication-related information will be stored in
-the returned \code{credential} object.
+Currently, \code{Authenticate} with \code{socialmedia = "twitter"} generates OAuth information to be used in
+the current active session only (i.e. "side-effect") and no authentication-related information will be stored in the
+returned \code{credential} object.
+
+For other social network API's it's useful to cache the credential to a file and then re-use it in future sessions.
+Refer to \code{\link{SaveCredential}} and \code{\link{LoadCredential}} to do this.
 }
 \examples{
-
 \dontrun{
 require(magrittr)
-## Instagram ego network example
-myAppID <- "123456789098765"
-myAppSecret <- "abc123abc123abc123abc123abc123ab"
-myUsernames <- c("senjohnmccain","obama")
 
-Authenticate("instagram",
-appID = myAappId,
-appSecret = myAppSecret) \%>\% Collect(ego = TRUE,
-username = myUsernames) \%>\% Create
+## youtube actor network example
+
+myYoutubeAPIKey <- "xxxxxxxxxxxxxxxxxxxxxx"
+listYoutubeVideoIDs <- c("W2GZFeYGU3s", "mL27TAJGlWc")
+
+myActorNetwork <- Authenticate("youtube", apiKey = myYoutubeAPIKey) \%>\%
+  Collect(videoIDs = listYoutubeVideoIDs) \%>\% Create("actor")
 
-## YouTube actor network example
-my_apiKeyYoutube <- "314159265358979qwerty"
-videoIDs <- c("W2GZFeYGU3s","mL27TAJGlWc")
+## instagram ego network example
 
-Authenticate("youtube",
-apiKey = my_apiKeyYoutube) \%>\% Collect(videoIDs = videoIDs) \%>\% Create('actor')
+myInstaAppID <- "xxxxxxxxxxx"
+myInstaAppSecret <- "xxxxxxxxxxxxxxxxxxxxxx"
+listInstaUsernames <- c("senjohnmccain", "obama")
+
+myEgoNetwork <- Authenticate("instagram", appID = myInstaAppID, appSecret = myInstaAppSecret) \%>\%
+  Collect(ego = TRUE, username = listInstaUsernames) \%>\% Create("ego")
 }
+
 }
 \seealso{
-\code{\link{AuthenticateWithFacebookAPI}},
-\code{\link{AuthenticateWithInstagramAPI}},
-\code{\link{AuthenticateWithTwitterAPI}},
-\code{\link{SaveCredential}},
-\code{\link{LoadCredential}}
-}
-\author{
-Chung-hong Chan <chainsawtiney@gmail.com>
+\code{\link{SaveCredential}}, \code{\link{Collect}}, \code{\link{Create}}
 }
+\keyword{authenticate}
+\keyword{credential}
+\keyword{facebook}
+\keyword{instagram}
+\keyword{reddit}
+\keyword{twitter}
+\keyword{youtube}
diff --git a/vosonSML/man/AuthenticateWithRedditAPI.Rd b/vosonSML/man/AuthenticateWithRedditAPI.Rd
index db949a2..30a1eac 100644
--- a/vosonSML/man/AuthenticateWithRedditAPI.Rd
+++ b/vosonSML/man/AuthenticateWithRedditAPI.Rd
@@ -4,7 +4,7 @@
 \alias{AuthenticateWithRedditAPI}
 \title{Reddit API authentication.}
 \usage{
-AuthenticateWithRedditAPI(appName, appKey, appSecret, useTokenCache)
+AuthenticateWithRedditAPI(appName, appKey, appSecret, useCachedToken)
 }
 \arguments{
 \item{appName}{character string containing the reddit app name associated with the API key.}
@@ -13,7 +13,7 @@ AuthenticateWithRedditAPI(appName, appKey, appSecret, useTokenCache)
 
 \item{appSecret}{character string containing the app secret.}
 
-\item{useTokenCache}{logical. Use cached authentication token if found.}
+\item{useCachedToken}{logical. Use cached authentication token if found.}
 }
 \value{
 a reddit authentication token
diff --git a/vosonSML/man/AuthenticateWithTwitterAPI.Rd b/vosonSML/man/AuthenticateWithTwitterAPI.Rd
index 478f599..9673c1f 100644
--- a/vosonSML/man/AuthenticateWithTwitterAPI.Rd
+++ b/vosonSML/man/AuthenticateWithTwitterAPI.Rd
@@ -2,66 +2,39 @@
 % Please edit documentation in R/AuthenticateWithTwitterAPI.R
 \name{AuthenticateWithTwitterAPI}
 \alias{AuthenticateWithTwitterAPI}
-\title{Note: this function is DEPRECATED and will be removed in a future release.
-Please use the \code{Authenticate} function}
+\title{Note: this function is DEPRECATED. Please use the \code{\link{Authenticate}} function.}
 \usage{
-AuthenticateWithTwitterAPI(api_key, api_secret, access_token,
-  access_token_secret, createToken)
+AuthenticateWithTwitterAPI(appName, apiKey, apiSecret, accessToken,
+  accessTokenSecret, useCachedToken)
 }
 \arguments{
-\item{api_key}{character string specifying the 'API key' used for
-authentication.}
+\item{appName}{Character string. Specifies the twitter registered app name associated with API keys.}
 
-\item{api_secret}{character string specifying the 'API secret' used for
-authentication.}
+\item{apiKey}{Character string. Specifies the app 'API key' used for authentication.}
 
-\item{access_token}{character string specifying the 'access token' used for
-authentication.}
+\item{apiSecret}{Character string. Specifies the app 'API secret'.}
 
-\item{access_token_secret}{character string specifying the 'access token
-secret' used for authentication.}
+\item{accessToken}{Character string. Specifies the app 'access token'.}
 
-\item{createToken}{logical. !! NOT PROPERLY IMPLEMENTED YET.}
+\item{accessTokenSecret}{Character string. Specifies the app 'access token secret'.}
+
+\item{useCachedToken}{Logical. If \code{TRUE} uses cached API token if found otherwise creates one.}
 }
 \value{
-This is called for its side effect.
+twitter_oauth. Returns a twitter oauth token object.
 }
 \description{
-Twitter API Authentication
+Twitter API authentication
 }
 \details{
-Oauth based authentication with the Twitter API
-
-In order to collect data from Twitter, the user must first authenticate with
-Twitter's Application Programming Interface (API).
+Oauth based authentication using the Twitter API.
 
-This requires setting up an App on Twitter. An excellent guide to achieving
-this can be found at:
-http://thinktostart.com/twitter-authentification-with-r/
-}
-\examples{
-
-\dontrun{
-  # Firstly specify your API credentials
-  my_api_key <- "1234567890qwerty"
-  my_api_secret <- "1234567890qwerty"
-  my_access_token <- "1234567890qwerty"
-  my_access_token_secret <- "1234567890qwerty"
-
-  AuthenticateWithTwitterAPI(api_key=my_api_key, api_secret=my_api_secret,
-    access_token=my_access_token, access_token_secret=my_access_token_secret)
-}
+In order to collect data from Twitter, the user must first authenticate with Twitter's API. This requires setting up 
+an app on Twitter. A useful guide to creating an app can be found in the rtweet documentation: 
+https://rtweet.info/articles/auth.html#creating-a-twitter-app
 }
 \seealso{
-\code{AuthenticateWithFacebookAPI} and
-\code{AuthenticateWithYouTubeAPI} for other ways to collect social media
-data.
-}
-\author{
-Timothy Graham <timothy.graham@anu.edu.au> & Robert Ackland
-<robert.ackland@anu.edu.au>
+\code{\link{Authenticate}}
 }
-\keyword{SNA}
-\keyword{media}
-\keyword{social}
+\keyword{authenticate}
 \keyword{twitter}
diff --git a/vosonSML/man/AuthenticateWithYoutubeAPI.Rd b/vosonSML/man/AuthenticateWithYoutubeAPI.Rd
new file mode 100644
index 0000000..ea335a8
--- /dev/null
+++ b/vosonSML/man/AuthenticateWithYoutubeAPI.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/AuthenticateWithYoutubeAPI.R
+\name{AuthenticateWithYoutubeAPI}
+\alias{AuthenticateWithYoutubeAPI}
+\title{YouTube API Authentication}
+\usage{
+AuthenticateWithYoutubeAPI(apiKey)
+}
+\arguments{
+\item{apiKey}{character string specifying your Google Developer API key.}
+}
+\value{
+This is called for its side effect.
+}
+\description{
+OAuth based authentication with the Google API.
+}
+\details{
+In order to collect data from YouTube, the user must first authenticate with Google's Application Programming 
+Interface (API). Users can obtain a Google Developer API key at: https://console.developers.google.com.
+}
+\note{
+In the future this function will enable users to save the API key in working directory, and the function will 
+automatically look for a locally stored key whenever it is called without apiKeyYoutube argument.
+}
diff --git a/vosonSML/man/Collect.Rd b/vosonSML/man/Collect.Rd
index dbc207e..094581c 100644
--- a/vosonSML/man/Collect.Rd
+++ b/vosonSML/man/Collect.Rd
@@ -7,66 +7,81 @@
 Collect(credential, ego = FALSE, ...)
 }
 \arguments{
-\item{credential}{\code{credential} object generated from
-\code{Authenticate}}
+\item{credential}{A \code{credential} object generated from \code{Authenticate}.}
 
-\item{ego}{logical, collecting ego network data. Currently only support
-Instagram.}
+\item{ego}{Logical. If \code{TRUE} collect ego network data. Currently only supports Instagram.}
 
-\item{...}{additional parameters for data collection (refer to
-CollectDataFrom* and CollectEgo* functions)
-
-\code{facebook}: pageName, rangeFrom, rangeTo, verbose, n, writeToFile, dynamic
-\code{youtube}: videoIDs, verbose, writeToFile, maxComments
-\code{twitter}: searchTerm, numTweets, verbose, writeToFile, language
-\code{instagram}: credential, tag, n, lat, lng, distance, folder, mindate, maxdate, verbose, sleep, writeToFile,
-waitForRateLimit
-\code{reddit}: threadUrls, waitTime, writeToFile
-
-\code{instagram} with \code{ego} = TRUE: username, userid, verbose,
-degreeEgoNet, waitForRateLimit, getFollows}
+\item{...}{Additional parameters for data collection by appropriate to credential \code{socialmedia} type.
+Refer to CollectDataFrom* and CollectEgo* functions for more details.
+\describe{
+  \item{twitter:}{\code{authToken, searchTerm, [searchType, numTweets, includeRetweets, retryOnRateLimit,}\cr
+                  \code{writeToFile, verbose, ...]}}
+  \item{youtube:}{\code{videoIDs, apiKeyYoutube, [verbose, writeToFile, maxComments]}}
+  \item{reddit:}{\code{threadUrls, [waitTime, writeToFile]}}
+  \item{instagram:}{\code{tag, n, lat, lng, [distance, folder, mindate, maxdate, verbose, sleep,}\cr
+                    \code{writeToFile, waitForRateLimit, credential]}}
+  \item{instagram with \code{ego = TRUE}:}{\code{username, userid, [verbose, degreeEgoNet,}\cr
+                                           \code{waitForRateLimit, getFollows, credential]}}
+  \item{facebook:}{\code{pageName, [rangeFrom, rangeTo, verbose, n, writeToFile, dynamic]}}
+}}
 }
 \value{
-A data.frame object of class \code{dataSource.*} that can be used
-with \code{Create}.
+A data.frame object of class \code{dataSource.*} that can be used with \code{Create}.
 }
 \description{
-This function collects data from social media APIs, and structures the data
-into a data frame of class \code{dataSource.*}, ready for creating networks
-for further analysis. \code{Collect} is the second step of the
-\code{Authenticate}, \code{Collect}, \code{Create} workflow. This function is
-a convenient UI wrapper to the core CollectDataFrom* family of functions.
+This function collects data from social media APIs, and structures the data into a data frame of class
+\code{dataSource.*}, ready for creating networks for further analysis. \code{Collect} is the second step of the
+\code{Authenticate}, \code{Collect}, \code{Create} workflow. This function is a convenient UI wrapper to the core
+CollectDataFrom* family of functions.
 }
 \examples{
-
 \dontrun{
 require(magrittr)
-## Instagram ego network example
-myAppID <- "123456789098765"
-myAppSecret <- "abc123abc123abc123abc123abc123ab"
-myUsernames <- c("senjohnmccain","obama")
 
-Authenticate("instagram",
-appID = myAappId,
-appSecret = myAppSecret) \%>\% Collect(ego = TRUE,
-username = myUsernames) \%>\% Create
+## youtube actor network example
+
+myYoutubeAPIKey <- "xxxxxxxxxxxxxxxxxxxxxx"
+listYoutubeVideoIDs <- c("W2GZFeYGU3s", "mL27TAJGlWc")
+
+myActorNetwork <- Authenticate("youtube", apiKey = myYoutubeAPIKey) \%>\%
+  Collect(videoIDs = listYoutubeVideoIDs) \%>\% Create("actor")
+
+## instagram ego network example
 
-## YouTube actor network example
-my_apiKeyYoutube <- "314159265358979qwerty"
-videoIDs <- c("W2GZFeYGU3s","mL27TAJGlWc")
+myInstaAppID <- "xxxxxxxxxxx"
+myInstaAppSecret <- "xxxxxxxxxxxxxxxxxxxxxx"
+listInstaUsernames <- c("senjohnmccain", "obama")
 
-Authenticate("youtube",
-apiKey = my_apiKeyYoutube) \%>\% Collect(videoIDs = videoIDs) \%>\% Create('actor')
+myEgoNetwork <- Authenticate("instagram", appID = myInstaAppID, appSecret = myInstaAppSecret) \%>\%
+  Collect(ego = TRUE, username = listInstaUsernames) \%>\% Create("ego")
+
+## facebook bimodal network example
+
+myFacebookAppID <- "xxxxxxxxxxx"
+myFacebookAppSecret <- "xxxxxxxxxxxxxxxxxxxxxx"
+
+myBimodalNetwork <- Authenticate("Facebook", appID = myFacebookAppID,
+                                 appSecret = myFacebookAppSecret) \%>\%
+  SaveCredential("FBCredential.RDS") \%>\%
+  Collect(pageName = "StarWars", rangeFrom = "2015-03-01", rangeTo = "2015-03-02",
+          writeToFile = FALSE) \%>\%
+  Create("bimodal")
+
+## facebook dynamic network example
+
+myDynamicNetwork <- LoadCredential("FBCredential.RDS") \%>\%
+  Collect(pageName = "StarWars", rangeFrom = "2015-03-01", rangeTo = "2015-03-02",
+          writeToFile = FALSE) \%>\%
+  Create("dynamic")
 }
 
 }
 \seealso{
-\code{CollectDataFacebook},
-\code{CollectDataInstagram},
-\code{CollectDataTwitter},
-\code{CollectEgoInstagram},
-\code{CollectDataReddit},
-}
-\author{
-Chung-hong Chan <chainsawtiney@gmail.com>
+\code{Authenticate}, \code{Create}
 }
+\keyword{collect}
+\keyword{facebook}
+\keyword{instagram}
+\keyword{reddit}
+\keyword{twitter}
+\keyword{youtube}
diff --git a/vosonSML/man/CollectDataReddit.Rd b/vosonSML/man/CollectDataReddit.Rd
index 028db5e..37ed7d1 100644
--- a/vosonSML/man/CollectDataReddit.Rd
+++ b/vosonSML/man/CollectDataReddit.Rd
@@ -4,7 +4,7 @@
 \alias{CollectDataReddit}
 \title{Collect reddit thread data}
 \usage{
-CollectDataReddit(threadUrls, waitTime = 5, writeToFile)
+CollectDataReddit(threadUrls, waitTime = 5, writeToFile = FALSE)
 }
 \arguments{
 \item{threadUrls}{character string vector. Reddit thread url's to collect data from.}
diff --git a/vosonSML/man/CollectDataTwitter.Rd b/vosonSML/man/CollectDataTwitter.Rd
index abaf665..8db42c3 100644
--- a/vosonSML/man/CollectDataTwitter.Rd
+++ b/vosonSML/man/CollectDataTwitter.Rd
@@ -2,140 +2,73 @@
 % Please edit documentation in R/CollectDataTwitter.R
 \name{CollectDataTwitter}
 \alias{CollectDataTwitter}
-\title{Note: this function is DEPRECATED and will be removed in a future release.
-Please use the \code{Collect} function}
+\title{Note: this function is DEPRECATED. Please use the \code{\link{Collect}} function.}
 \usage{
-CollectDataTwitter(searchTerm, numTweets, verbose, writeToFile, language,
-  since, until, locale, geocode, sinceID, maxID, resultType,
-  retryOnRateLimit)
+CollectDataTwitter(authToken = NULL, searchTerm = "",
+  searchType = "recent", numTweets = 100, includeRetweets = TRUE,
+  retryOnRateLimit = FALSE, writeToFile = FALSE, verbose = FALSE,
+  ...)
 }
 \arguments{
-\item{searchTerm}{character string, specifying a search term or phrase (e.g.
-"Australian politics") or hashtag (e.g. "#auspol"). Many query operators are
-available - see the Twitter documentation for more information:
-https://dev.twitter.com/rest/public/search}
-
-\item{numTweets}{numeric integer, specifying how many tweets to be
-collected. Defaults to 1500. Maximum tweets for a single call of this
-function is 1500.}
-
-\item{verbose}{logical. If \code{TRUE} then this function will output
-runtime information to the console as it computes. Useful diagnostic tool
-for long computations. Default is \code{FALSE}.}
-
-\item{writeToFile}{logical. If \code{TRUE} then the data is saved to file in
-current working directory (CSV format), with filename denoting current
-system time and \code{searchTerm}. Default is \code{FALSE}.}
+\item{authToken}{Twitter oauth token created by rtweet.}
 
-\item{language}{character string, restricting tweets to the given language,
-given by an ISO 639-1 code. For example, "en" restricts to English tweets.
-Defaults to NULL.}
-
-\item{since}{If not NULL, restricts tweets to those since the given date. Date is to be formatted
-as YYYY-MM-DD (this is a wrapper to the searchTwitter function in the twitteR package).}
+\item{searchTerm}{Character string. Specifies a search term or phrase (e.g. "Australian politics") or hashtag (e.g. 
+"#auspol"). Many query operators are available - see the Twitter documentation for more information: 
+https://dev.twitter.com/rest/public/search}
 
-\item{until}{If not NULL, restricts tweets to those up until the given date. Date is to be formatted
-as YYYY-MM-DD (this is a wrapper to the searchTwitter function in the twitteR package).}
+\item{searchType}{Character string. Returns filtered tweets as per search type \code{recent}, \code{mixed} or 
+\code{popular}. Default type is \code{recent}.}
 
-\item{locale}{If not NULL, will set the locale for the search. As of 03/06/11 only ja is effective,
-as per the Twitter API (this is a wrapper to the searchTwitter function in the twitteR package).}
+\item{numTweets}{Numeric. Specifies how many tweets to be collected. Defaults is \code{100}.}
 
-\item{geocode}{If not NULL, returns tweets by users located within a given radius of the given
-latitude/longitude. (this is a wrapper to the searchTwitter function in the twitteR package).}
+\item{includeRetweets}{Logical. Specifies if the search should filter out retweets. Defaults is \code{TRUE}.}
 
-\item{sinceID}{If not NULL, returns tweets with IDs greater (ie newer) than the specified ID
-(this is a wrapper to the searchTwitter function in the twitteR package).}
+\item{retryOnRateLimit}{Logical. Default is \code{FALSE}.}
 
-\item{maxID}{If not NULL, returns tweets with IDs smaller (ie older) than the specified ID
-(this is a wrapper to the searchTwitter function in the twitteR package).}
+\item{writeToFile}{Logical. If \code{TRUE} then the data is saved to file in current working directory (RDS format), 
+with filename denoting current system time and \code{searchTerm}. Default is \code{FALSE}.}
 
-\item{resultType}{If not NULL, returns filtered tweets as per value. See details for allowed values.
-(this is a wrapper to the searchTwitter function in the twitteR package).}
+\item{verbose}{Logical. If \code{TRUE} then this function will output runtime information to the console as it 
+computes. Useful diagnostic tool for long computations. Default is \code{FALSE}.}
 
-\item{retryOnRateLimit}{If non-zero the search command will block retry up to X times if the rate limit
-is experienced. This might lead to a much longer run time but the task will
-eventually complete if the retry count is high enough (this is a wrapper to the searchTwitter
-function in the twitteR package).}
+\item{...}{Additional parameters to pass to the rtweet \code{search_tweets} function.}
 }
 \value{
-A data frame object of class \code{dataSource.twitter} that can be
-used for creating unimodal networks (\code{CreateActorNetwork}), bimodal
-networks (\code{CreateBimodalNetwork}), and semantic networks
+A data frame object of class \code{dataSource.twitter} that can be used for creating unimodal networks 
+(\code{CreateActorNetwork}), bimodal networks (\code{CreateBimodalNetwork}), and semantic networks 
 (\code{CreateSemanticNetwork}).
 }
 \description{
 Collect data from Twitter for generating different types of networks
 }
 \details{
-This function collects data from Twitter based on hashtags or search terms,
-and structures the data into a data frame of class
-\code{dataSource.twitter}, ready for creating networks for further analysis.
+This function collects data from Twitter based on hashtags or search terms, and structures the data into a data 
+frame of class \code{dataSource.twitter}, ready for creating networks for further analysis.
 
-\code{CollectDataTwitter} collects public 'tweets' from Twitter using the
-Twitter API.
+\code{CollectDataTwitter} collects public 'tweets' from Twitter using the Twitter API.
 
-The function then finds and maps the relationships of entities of interest
-in the data (e.g. users, terms, hashtags), and structures these
-relationships into a data frame format suitable for creating unimodal
-networks (\code{CreateActorNetwork}), bimodal networks
-(\code{CreateBimodalNetwork}), and semantic networks
+The function then finds and maps the relationships of entities of interest in the data (e.g. users, terms, hashtags)
+, and structures these relationships into a data frame format suitable for creating unimodal networks 
+(\code{CreateActorNetwork}), bimodal networks (\code{CreateBimodalNetwork}), and semantic networks 
 (\code{CreateSemanticNetwork}).
 
-The maximum number of tweets for a single call of \code{CollectDataTwitter}
-is 1500.
+The maximum number of tweets for a single call of \code{CollectDataTwitter} is 1500.
 
-Language support is available, using the \code{language} argument. The user
-can restrict tweets returned to a particular language, using the ISO 639-1
-code. For example, restricting to English would use \code{language="en"}.
-The full list of codes is available here:
-https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes.
+Language support is available, using the \code{language} parameter. The user can restrict tweets returned to a 
+particular language, using the ISO 639-1 code. For example, restricting to English would use \code{language="en"}. 
+The full list of codes is available here: https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes.
 
-A variety of query operators are available through the Twitter API. For
-example, "love OR hate" returns any tweets containing either term (or both).
-For more information see the Twitter API documentation (under the heading
+A variety of query operators are available through the Twitter API. For example, "love OR hate" returns any tweets 
+containing either term (or both). For more information see the Twitter API documentation (under the heading
 'Query Operators'): https://dev.twitter.com/rest/public/search
 }
 \note{
-Data generated using this function is *not* suitable for dynamic
-networks. Dynamic Twitter networks are not currently implemented in the
-vosonSML package. This will be implemented in a future release.
-}
-\examples{
+Supported network types: \code{actor}, \code{bimodal}, \code{semantic}
 
-\dontrun{
-  # Firstly specify your API credentials
-  my_api_key <- "1234567890qwerty"
-  my_api_secret <- "1234567890qwerty"
-  my_access_token <- "1234567890qwerty"
-  my_access_token_secret <- "1234567890qwerty"
-
-  # Authenticate with the Twitter API using \\code{AuthenticateWithTwitterAPI}
-  AuthenticateWithTwitterAPI(api_key=my_api_key, api_secret=my_api_secret,
-    access_token=my_access_token, access_token_secret=my_access_token_secret)
-
-  # Collect tweets data using \\code{myTwitterData}
-  myTwitterData <- CollectDataTwitter(searchTerm="#auspol",
-    numTweets=150,writeToFile=FALSE,verbose=FALSE)
-
-  # Create an 'actor' network using \\code{CreateActorNetwork}
-  g_actor_twitter <- CreateActorNetwork(myTwitterData)
-
-  # Create a 'bimodal' network using \\code{CreateBimodalNetwork}
-  g_bimodal_twitter <- CreateBimodalNetwork(myTwitterData)
-
-  # Create a 'semantic' network using \\code{CreateSemanticNetwork}
-  g_semantic_twitter <- CreateSemanticNetwork(myTwitterData)
-  }
+Data generated using this function is *not* suitable for dynamic networks.
 }
 \seealso{
-\code{AuthenticateWithTwitterAPI} must be run first or no data will
-be collected.
-}
-\author{
-Timothy Graham <timothy.graham@anu.edu.au> & Robert Ackland
-<robert.ackland@anu.edu.au>
+\code{Collect}
 }
-\keyword{SNA}
-\keyword{data}
-\keyword{mining}
+\keyword{collect}
 \keyword{twitter}
diff --git a/vosonSML/man/CollectDataYoutube.Rd b/vosonSML/man/CollectDataYoutube.Rd
new file mode 100644
index 0000000..52a296c
--- /dev/null
+++ b/vosonSML/man/CollectDataYoutube.Rd
@@ -0,0 +1,60 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/CollectDataYoutube.R
+\name{CollectDataYoutube}
+\alias{CollectDataYoutube}
+\title{Collect YouTube comments data for generating different types of networks}
+\usage{
+CollectDataYoutube(apiKey, videoIDs, verbose = FALSE,
+  writeToFile = FALSE, maxComments = 1e+13)
+}
+\arguments{
+\item{apiKey}{character string, specifying the Google Developer API Key used for authentication.}
+
+\item{videoIDs}{character vector, specifying one or more YouTube video IDs. For example, if the video URL is 
+'https://www.youtube.com/watch?v=W2GZFeYGU3s', then use videoIDs='W2GZFeYGU3s'. For multiple videos, the 
+function GetYoutubeVideoIDs can be used to create a vector object suitable as input for videoIDs.}
+
+\item{verbose}{logical. If TRUE then this function will output runtime information to the console as it 
+computes. Useful diagnostic tool for long computations. Default is FALSE.}
+
+\item{writeToFile}{logical. If TRUE then the data is saved to file in current working directory (CSV format), 
+with filename denoting current system time. Default is FALSE.}
+
+\item{maxComments}{numeric integer, specifying how many 'top-level' comments to collect from each video. This value 
+*does not* take into account 'reply' comments (i.e. replies to top-level comments), therefore the total number of
+comments collected may be higher than maxComments. By default this function attempts to collect all comments.}
+}
+\value{
+A dataframe object of class dataSource.youtube that can be used for creating unimodal networks 
+(CreateActorNetwork).
+}
+\description{
+This function collects YouTube comments data for one or more YouTube videos. It structures the data into a data 
+frame of class dataSource.youtube, ready for creating networks for further analysis.
+}
+\details{
+CollectDataYoutube collects public comments from YouTube videos, using the YouTube API.
+
+The function then finds and maps the relationships of YouTube users who have interacted with each other 
+(i.e. user i has replied to user j or mentioned user j in a comment) and structures these relationships into a data 
+frame format suitable for creating unimodal networks (CreateActorNetwork).
+
+For multiple videos, the user may wish to use the function GetYoutubeVideoIDs, which creates a character
+vector of video IDs from a plain text file of YouTube video URLs, which can then be used for the videoIDs
+argument of the function CollectDataYoutube.
+}
+\note{
+Currently supported network types: unimodal 'actor' network; CreateActorNetwork.
+
+Data generated using this function is *not* suitable for dynamic networks.
+Dynamic YouTube comments networks are not currently implemented in the vosonSML package. This will be implemented in 
+a future release.
+
+Note on maxComments argument: Due to quirks/specifications of the Google API, it is currently not possible to 
+specify the exact number of comments to return from the API using maxResults argument (i.e.including comments 
+that are replies to top-level comments). Therefore, the number of comments collected is usually somewhat greater than
+maxComments, if a value is specified for this argument. For example, if a video contains 10 top-level 
+comments, and one of these top-level comments has 5 'child' or reply comments, then the total number of comments
+collected will be equal to 15. Currently, the user must 'guesstimate' the maxResults value, to collect a 
+number of comments in the order of what they require.
+}
diff --git a/vosonSML/man/Create.Rd b/vosonSML/man/Create.Rd
index 6442628..9d39fcb 100644
--- a/vosonSML/man/Create.Rd
+++ b/vosonSML/man/Create.Rd
@@ -7,49 +7,32 @@
 Create(dataSource, type = "actor", ...)
 }
 \arguments{
-\item{dataSource}{a data frame of class \code{dataSource}}
+\item{dataSource}{Social media data collected using the \code{Collect} method.}
 
-\item{type}{character, type of network to be created, currently supports "actor", "bimodal", "dynamic", "semantic" 
-and "ego"}
+\item{type}{Character string. Type of network to be created, can be \code{actor}, \code{bimodal},
+\code{dynamic}, \code{semantic} or \code{ego}.}
 
-\item{...}{additional parameters for create*Network functions}
+\item{...}{Additional parameters for network creation for appropriate \code{social media} and network \code{type}. 
+Refer to S3 methods \code{social media} type for default parameters.}
 }
 \value{
-an igraph graph object
+Network data containing an igraph object.
 }
 \description{
-This function creates networks from social media data (i.e. from data frames of class \code{dataSource}. 
-\code{Create} is the final step of the \code{Authenticate}, \code{Collect}, \code{Create} workflow. This function is
-a convenient UI wrapper to the core create*Network family of functions.
-}
-\details{
-Note: when creating Twitter networks, the user information can be collected separately using the 
-\code{\link{PopulateUserInfo}} function and stored into the network as vertex attributes (this involves additional
-calls to the Twitter API).
-}
-\examples{
-\dontrun{
-require(magrittr)
-
-## instagram ego network example
-
-my_app_id     <- "123456789098765"
-my_app_secret <- "abc123abc123abc123abc123abc123ab"
-my_usernames  <- c("senjohnmccain", "obama")
-
-my_ego_network <- Authenticate("instagram", appID = my_app_id, appSecret = my_app_secret) \%>\% 
-  Collect(ego = TRUE, username = my_usernames) \%>\% Create
-
-## youtube actor network example
-
-my_api_key   <- "314159265358979qwerty"
-my_video_ids <- c("W2GZFeYGU3s","mL27TAJGlWc")
-
-my_actor_network <- Authenticate("youtube", apiKey = my_api_key) \%>\% 
-  Collect(videoIDs = my_video_ids) \%>\% Create('actor')
-
-}
-}
-\author{
-Chung-hong Chan <chainsawtiney@gmail.com>
-}
+This function creates networks from social media data (i.e. collected from dataframes of class \code{social media}).
+\code{Create} is the final step of the \code{Authenticate}, \code{Collect}, \code{Create} workflow. This function 
+is a wrapper for the Create*Network S3 methods.
+}
+\note{
+When creating twitter networks, a network with additional user information can be generated using the
+\code{\link{GraphUserInfoTwitter}} function. Additional calls can be made to the twitter API to get information
+about users that were identified as nodes during network creation.
+}
+\seealso{
+\code{\link{CreateActorNetwork}}, \code{\link{CreateBimodalNetwork}}, \code{\link{CreateSemanticNetwork}}
+}
+\keyword{actor}
+\keyword{bimodal}
+\keyword{create}
+\keyword{network}
+\keyword{semantic}
diff --git a/vosonSML/man/CreateActorNetwork.Rd b/vosonSML/man/CreateActorNetwork.Rd
new file mode 100644
index 0000000..2ea8e55
--- /dev/null
+++ b/vosonSML/man/CreateActorNetwork.Rd
@@ -0,0 +1,68 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/CreateActorNetwork.R,
+%   R/CreateActorNetwork.reddit.R, R/CreateActorNetwork.twitter.R,
+%   R/CreateActorNetwork.youtube.R
+\name{CreateActorNetwork}
+\alias{CreateActorNetwork}
+\alias{CreateActorNetwork.default}
+\alias{CreateActorNetwork.reddit}
+\alias{CreateActorNetwork.twitter}
+\alias{CreateActorNetwork.youtube}
+\title{Create an actor network from social media data}
+\usage{
+CreateActorNetwork(x, ...)
+
+\method{CreateActorNetwork}{default}(x, ...)
+
+\method{CreateActorNetwork}{reddit}(x, weightEdges = FALSE,
+  textData = FALSE, cleanText = TRUE, writeToFile = FALSE, ...)
+
+\method{CreateActorNetwork}{twitter}(x, writeToFile = FALSE,
+  verbose = FALSE, ...)
+
+\method{CreateActorNetwork}{youtube}(x, writeToFile = FALSE, ...)
+}
+\arguments{
+\item{x}{Collected social media data with \code{social media} class attribute.}
+
+\item{...}{Additional parameters to pass to the network creation method.}
+
+\item{weightEdges}{Logical. Combines and weights directed network edges. Default is \code{FALSE}.}
+
+\item{textData}{Logical. If the igraph network should include the comment text as an edge attribute. 
+Cannot be used with the \code{weightEdges} parameter. Default is \code{FALSE}.}
+
+\item{cleanText}{Logical. If non-alphanumeric, non-punctuation, and non-space characters should be removed from the 
+included text attribute data. Only applies if \code{textData = TRUE}. Default is \code{TRUE}.}
+
+\item{writeToFile}{Logical. Save network data to a file in the current working directory. Default is \code{FALSE}.}
+
+\item{verbose}{Logical. Output additional information about the network creation. Default is \code{FALSE}.}
+}
+\value{
+A reddit actor network as igraph object.
+
+A twitter actor network as list containing a relations dataframe, users dataframe and igraph object.
+
+A youtube actor network as igraph object.
+}
+\description{
+This function creates an actor network from social media data collected using the \code{Collect} method. Edges in 
+the network represent interactions or relationships between the actors. For example, with twitter data an 
+interaction is defined as a 'mention', reply' or 'retweet' from user i to user j, given 'tweet' m. With youtube 
+comments, an interaction is defined as a 'reply' from user i to user j, given 'comment' m. The resulting network is 
+returned as an igraph object.
+}
+\note{
+For twitter data, actor networks can be created from multiple data frames (i.e. datasets collected individually
+using \code{Collect} method. Simply create a list of the data frames that you wish to create a network from.
+For example, \code{myList <- list(myTwitterData1, myTwitterData2, myTwitterData3)}
+}
+\seealso{
+\code{\link{Create}}
+}
+\keyword{actor}
+\keyword{create}
+\keyword{reddit}
+\keyword{twitter}
+\keyword{youtube}
diff --git a/vosonSML/man/CreateActorNetwork.reddit.Rd b/vosonSML/man/CreateActorNetwork.reddit.Rd
deleted file mode 100644
index 6633d69..0000000
--- a/vosonSML/man/CreateActorNetwork.reddit.Rd
+++ /dev/null
@@ -1,36 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CreateActorNetwork.reddit.R
-\name{CreateActorNetwork.reddit}
-\alias{CreateActorNetwork.reddit}
-\title{Creates a reddit actor network from collected threads}
-\usage{
-\method{CreateActorNetwork}{reddit}(x, weightEdges, includeTextData,
-  cleanText, writeToFile)
-}
-\arguments{
-\item{x}{a dataframe as vosonSML class object containing collected social network data}
-
-\item{weightEdges}{logical. Combines and weights directed edges. Can't be used with includeTextData.}
-
-\item{includeTextData}{logical. If the igraph network edges should include the comment text as attribute.}
-
-\item{cleanText}{logical. If non-alphanumeric, non-punctuation, and non-space characters should be removed from the 
-included text attribute data. Default is TRUE}
-
-\item{writeToFile}{logical. If the igraph network graph should be written to file.}
-}
-\value{
-an igraph object of the actor network
-}
-\description{
-Uses RedditExtractoR::user_network to create an igraph directed actor network with comment ids as edge attribute.
-}
-\note{
-Can create three types of network graphs:
-* Directed graph with subreddit, thread_ids and comment ids as edge attributes - default option
-* Directed graph with weighted edges (without comment ids) - weightEdges = TRUE
-* Directed graph with comment text included as edge attribute - includeTextData = TRUE
-
-Comment ids as edge attributes in graphs refer to the Collect dataframe comment id not reddits comment id 
-If "Forbidden control character 0x19 found in igraph_i_xml_escape, Invalid value" then set cleanText = TRUE
-}
diff --git a/vosonSML/man/CreateBimodalNetwork.Rd b/vosonSML/man/CreateBimodalNetwork.Rd
index 80c9f15..4fda66e 100644
--- a/vosonSML/man/CreateBimodalNetwork.Rd
+++ b/vosonSML/man/CreateBimodalNetwork.Rd
@@ -2,108 +2,55 @@
 % Please edit documentation in R/CreateBimodalNetwork.R
 \name{CreateBimodalNetwork}
 \alias{CreateBimodalNetwork}
-\title{Note: this function is DEPRECATED and will be removed in a future release.
-Please use the \code{Create} function}
+\title{Create bimodal networks from social media data}
 \usage{
-CreateBimodalNetwork(x, writeToFile, removeTermsOrHashtags)
+CreateBimodalNetwork(x, writeToFile, removeTermsOrHashtags, ...)
 }
 \arguments{
-\item{x}{a data frame of class \code{dataSource}. For Twitter data, it is
-also possible to provide a *list* of data frames (i.e. data frames that
-inherit class \code{dataSource} and \code{twitter}). Only lists of Twitter
-data frames are supported at this time. If a list of data frames is
-provided, then the function binds these row-wise and computes over the
-entire data set.}
+\item{x}{A data frame of class \code{dataSource}. For Twitter data, it is also possible to provide a *list* of data 
+frames (i.e. data frames that inherit class \code{dataSource} and \code{twitter}). Only lists of Twitter data 
+frames are supported at this time. If a list of data frames is provided, then the function binds these row-wise and 
+computes over the entire data set.}
 
-\item{writeToFile}{logical. If \code{TRUE} then the network is saved to file
-in current working directory (GRAPHML format), with filename denoting the
-current date/time and the type of network.}
+\item{writeToFile}{Logical. If \code{TRUE} then the network is saved to file in current working directory (GRAPHML 
+format), with filename denoting the current date/time and the type of network.}
 
-\item{removeTermsOrHashtags}{character vector. Default is none. Otherwise
-this argument specifies which terms or hashtags (i.e. vertices with matching
-`name`) should be removed from the bimodal network. This is useful to remove
-the search term or hashtag that was used to collect the data (i.e. remove
-the corresponding vertex in the graph). For example, a value of "#auspol"
-means that if there is a vertex with the exact name "#auspol" then this
-vertex will be removed.}
+\item{removeTermsOrHashtags}{Character string. Default is none. Otherwise this argument specifies which terms or 
+hashtags (i.e. vertices with matching 'name') should be removed from the bimodal network. This is useful to remove
+the search term or hashtag that was used to collect the data (i.e. remove the corresponding vertex in the graph). 
+For example, a value of "#auspol" means that if there is a vertex with the exact name "#auspol" then this vertex 
+will be removed.}
+
+\item{...}{Additional parameters to pass to the network creation method.}
 }
 \value{
 An igraph graph object, with weighted and directed edges.
 }
 \description{
-Create bimodal networks from social media data
+This function creates a bimodal network from social media data (i.e. from data frames of class \code{dataSource}, or 
+for Twitter data it is also possible to provide a *list* of data frames), with edges representing relationships 
+between actors of two different types (e.g. Facebook users and Facebook posts, with edges representing whether a 
+user has commented or 'liked' a post).
 }
 \details{
-This function creates a bimodal network from social media data (i.e. from
-data frames of class \code{dataSource}, or for Twitter data it is also
-possible to provide a *list* of data frames), with edges representing
-relationships between actors of two different types (e.g. Facebook users and
-Facebook posts, with edges representing whether a user has commented or
-'liked' a post).
-
-This function creates a (directed and weighted) bimodal network from a data
-frame of class \code{dataSource} (which are created using the `CollectData`
-family of functions in the vosonSML package), or a *list* of Twitter
-data frames collected using \code{CollectDataTwitter} function.
+This function creates a (directed and weighted) bimodal network from a data frame of class \code{dataSource} (which 
+are created using the 'CollectData' family of functions in the vosonSML package), or a *list* of Twitter data 
+frames collected using \code{CollectDataTwitter} function.
 
-The resulting network is an igraph graph object. This graph object is
-bimodal because edges represent relationships between vertices of two
-different types. For example, in a bimodal Facebook network, vertices
-represent Facebook users or Facebook posts, and edges represent whether a
-user has commented or 'liked' a post. Edges are directed and weighted (e.g.
-if user i has commented n times on post j, then the weight of this directed
-edge equals n).
+The resulting network is an igraph graph object. This graph object is bimodal because edges represent relationships 
+between vertices of two different types. For example, in a bimodal Facebook network, vertices represent Facebook 
+users or Facebook posts, and edges represent whether a user has commented or 'liked' a post. Edges are directed and 
+weighted (e.g. if user i has commented n times on post j, then the weight of this directed edge equals n).
 }
 \note{
-Not all data sources in vosonSML can be used for creating
-bimodal networks.
-
-Currently supported data sources are:
-
-- Facebook - Twitter
-
-Other data sources (e.g. YouTube) will be implemented in the future.
-Additionally, the user is notified if they try to create bimodal networks
-for incompatible data sources.
-
-For Twitter data, bimodal networks can be created from multiple data frames
-(i.e. datasets collected individually using CollectDataTwitter). Simply
-create a list of the data frames that you wish to create a network from. For
-example, \code{myList <- list(myTwitterData1, myTwitterData2,
-myTwitterData3)}.
-}
-\examples{
-
-\dontrun{
-  ## This example shows how to collect Facebook page data and create a bimodal network
-
-  # Use your own values for myAppID and myAppSecret
-  myAppID <- "123456789098765"
-  myAppSecret <- "abc123abc123abc123abc123abc123ab"
-
-  # Authenticate with the Facebook API using `AuthenticateWithFacebookAPI`
-  fb_oauth <- AuthenticateWithFacebookAPI(appID=myAppID, appSecret=myAppSecret,
-    extended_permissions=FALSE, useCachedToken=TRUE)
-
-  # Run the `CollectDataFacebook` function and store the results in variable `myFacebookData`
-  myFacebookData <- CollectDataFacebook(pageName="StarWars", rangeFrom="2014-05-15",
-  rangeTo="2014-06-03",writeToFile=FALSE,verbose=TRUE)
-
-  # Create a 'bimodal' network using \\code{CreateBimodalNetwork}
-  g_bimodal_facebook <- CreateBimodalNetwork(myFacebookData)
-
-  # View descriptive information about the bimodal network
-  g_bimodal_facebook
-}
+Supported data sources: \code{facebook}, \code{twitter}
 
+For Twitter data, bimodal networks can be created from multiple data frames (i.e. datasets collected individually 
+using CollectDataTwitter). Simply create a list of the data frames that you wish to create a network from. For
+example, \code{myList <- list(myTwitterData1, myTwitterData2, myTwitterData3)}.
 }
 \seealso{
-See \code{CollectDataFacebook} and \code{CollectDataTwitter} to
-collect data for creating bimodal networks in vosonSML.
-}
-\author{
-Timothy Graham <timothy.graham@anu.edu.au> & Robert Ackland
-<robert.ackland@anu.edu.au>
+\code{CollectDataFacebook}, \code{CollectDataTwitter}
 }
 \keyword{SNA}
 \keyword{bimodal}
diff --git a/vosonSML/man/CreateSemanticNetwork.Rd b/vosonSML/man/CreateSemanticNetwork.Rd
index 83a1434..0908b3f 100644
--- a/vosonSML/man/CreateSemanticNetwork.Rd
+++ b/vosonSML/man/CreateSemanticNetwork.Rd
@@ -2,127 +2,73 @@
 % Please edit documentation in R/CreateSemanticNetwork.R
 \name{CreateSemanticNetwork}
 \alias{CreateSemanticNetwork}
-\title{Note: this function is DEPRECATED and will be removed in a future release.
-Please use the \code{Create} function}
+\title{Creates a semantic network from social media data (semantic relationships between concepts)}
 \usage{
 CreateSemanticNetwork(x, writeToFile, termFreq, hashtagFreq,
-  removeTermsOrHashtags, stopwordsEnglish)
+  removeTermsOrHashtags, stopwordsEnglish, ...)
 }
 \arguments{
-\item{x}{a data frame of class \code{dataSource}. For Twitter data, it is
-also possible to provide a *list* of data frames (i.e. data frames that
-inherit class \code{dataSource} and \code{twitter}). Only lists of Twitter
-data frames are supported at this time. If a list of data frames is
-provided, then the function binds these row-wise and computes over the
-entire data set.}
+\item{x}{A data frame of class \code{dataSource}. For Twitter data, it is also possible to provide a *list* of data 
+frames (i.e. data frames that inherit class \code{dataSource} and \code{twitter}). Only lists of Twitter data 
+frames are supported at this time. If a list of data frames is provided, then the function binds these row-wise and 
+computes over the entire data set.}
 
-\item{writeToFile}{logical. If \code{TRUE} then the network is saved to file
-in current working directory (GRAPHML format), with filename denoting the
-current date/time and the type of network.}
+\item{writeToFile}{Logical. If \code{TRUE} then the network is saved to file in current working directory (GRAPHML 
+format), with filename denoting the current date/time and the type of network.}
 
-\item{termFreq}{numeric integer, specifying the percentage of most frequent
-TERMS to include. For example, a value of 20 means that the 20 percent most
-frequently occurring terms will be included in the semantic network. The
+\item{termFreq}{Numeric integer. Specifies the percentage of most frequent TERMS to include. For example, a value 
+of 20 means that the 20 percent most frequently occurring terms will be included in the semantic network. The 
 default value is 5, meaning the 5 percent most frequent terms are used.}
 
 \item{hashtagFreq}{** NOT IMPLEMENTED YET - DEFAULTS TO ALL HASHTAGS **.
-numeric integer, specifying the percentage of most frequent HASHTAGS to
-include. For example, a value of 80 means that the 80 percent most frequently
-occurring hashtags will be included in the semantic network. The default
-value is 50, meaning the 50 percent most frequent hashtags are used.}
+Numeric integer. Specifies the percentage of most frequent HASHTAGS to include. For example, a value of 80 means 
+that the 80 percent most frequently occurring hashtags will be included in the semantic network. The default value 
+is 50, meaning the 50 percent most frequent hashtags are used.}
 
-\item{removeTermsOrHashtags}{character vector. Default is none. Otherwise
-this argument specifies which terms or hashtags (i.e. vertices with matching
-`name`) should be removed from the semantic network. This is useful to
-remove the search term or hashtag that was used to collect the data (i.e.
-remove the corresponding vertex in the graph). For example, a value of
-"#auspol" means that if there is a vertex with the name "#auspol" then this
-vertex will be removed.}
+\item{removeTermsOrHashtags}{Character string vector. Default is none. Otherwise this argument specifies which terms 
+or hashtags (i.e. vertices with matching 'name') should be removed from the semantic network. This is useful to 
+remove the search term or hashtag that was used to collect the data (i.e. remove the corresponding vertex in the 
+graph). For example, a value of "#auspol" means that if there is a vertex with the name "#auspol" then this vertex 
+will be removed.}
 
-\item{stopwordsEnglish}{logical. If \code{TRUE} then English stopwords are
-removed from the tweets (e.g. words such as 'the' or 'and'). Using
-\code{FALSE} may be helpful non-English data sets. The default is
-\code{TRUE} (i.e. stopwords will be removed).}
+\item{stopwordsEnglish}{Logical. If \code{TRUE} then English stopwords are removed from the tweets (e.g. words such 
+as 'the' or 'and'). Using \code{FALSE} may be helpful non-English data sets. The default is \code{TRUE} (i.e. 
+stopwords will be removed).}
+
+\item{...}{Additional parameters to pass to the network creation method.}
 }
 \value{
 An igraph graph object, with weighted edges.
 }
 \description{
-Create semantic networks from social media data (semantic relationships
-between concepts)
+This function creates a semantic network from social media data (i.e. from data frames of class \code{dataSource}, 
+or for Twitter data it is also possible to provide a list of data frames). In such semantic networks, concepts are 
+words/terms extracted from the text corpus of social media data (e.g. tweets on Twitter).
 }
 \details{
-This function creates a semantic network from social media data (i.e. from
-data frames of class \code{dataSource}, or for Twitter data it is also
-possible to provide a list of data frames). In such semantic networks,
-concepts are words/terms extracted from the text corpus of social media data
-(e.g. tweets on Twitter).
-
-This function creates a weighted network from a data frame of class
-\code{dataSource} (which are created using the `CollectData` family of
-functions in the vosonSML package), or a list of Twitter data frames
-collected using \code{CollectDataTwitter} function.
-
-The resulting semantic network is an igraph graph object. This graph object
-is semantic because vertices represent unique concepts (in this case unique
-terms/words extracted from a social media text corpus), and edges represent
-the co-occurrence of terms for all observations in the data set. For
-example, for a Twitter semantic network, vertices represent either hashtags
-(e.g. "#auspol") or single terms ("politics"). If there are 1500 tweets in
-the data set (i.e. 1500 observations), and the term "#auspol" and the term
-"politics" appear together in every tweet, then this will be represented by
-an edge with weight equal to 1500.
+This function creates a weighted network from a data frame of class \code{dataSource} (which are created using the 
+'CollectData' family of functions in the vosonSML package), or a list of Twitter data frames collected using 
+\code{CollectDataTwitter} function.
+
+The resulting semantic network is an igraph graph object. This graph object is semantic because vertices represent 
+unique concepts (in this case unique terms/words extracted from a social media text corpus), and edges represent 
+the co-occurrence of terms for all observations in the data set. For example, for a Twitter semantic network, 
+vertices represent either hashtags (e.g. "#auspol") or single terms ("politics"). If there are 1500 tweets in the 
+data set (i.e. 1500 observations), and the term "#auspol" and the term "politics" appear together in every tweet, 
+then this will be represented by an edge with weight equal to 1500.
 }
 \note{
-Not all data sources in vosonSML can be used for creating
-semantic networks.
-
-Currently supported data sources are:
-
-- Twitter
-
-Other data sources (e.g. YouTube and Facebook) will be implemented in the
-future. Additionally, the user is notified if they try to create semantic
-networks for incompatible data sources.
-
-For Twitter data, semantic networks can be created from multiple data frames
-(i.e. datasets collected individually using CollectDataTwitter). Simply
-create a list of the data frames that you wish to create a network from. For
-example, \code{myList <- list(myTwitterData1, myTwitterData2,
-myTwitterData3)}.
-}
-\examples{
-
-\dontrun{
-  ## This example shows how to collect Twitter data and create a semantic network
-
-  # Firstly specify your API credentials
-  my_api_key <- "1234567890qwerty"
-  my_api_secret <- "1234567890qwerty"
-  my_access_token <- "1234567890qwerty"
-  my_access_token_secret <- "1234567890qwerty"
-
-  # Authenticate with the Twitter API using \\code{AuthenticateWithTwitterAPI}
-  AuthenticateWithTwitterAPI(api_key=my_api_key, api_secret=my_api_secret,
-    access_token=my_access_token, access_token_secret=my_access_token_secret)
-
-  # Collect tweets data using \\code{myTwitterData}
-  myTwitterData <- CollectDataTwitter(searchTerm="#auspol",
-    numTweets=200,writeToFile=FALSE,verbose=FALSE)
-
-  # Create a 'semantic' network using \\code{CreateSemanticNetwork}
-  g_semantic_twitter <- CreateSemanticNetwork(myTwitterData,writeToFile=FALSE,
-    termFreq=20,hashtagFreq=80)
+Currently supported data sources:
+\itemize{
+  \item \code{twitter}
 }
 
+For Twitter data, semantic networks can be created from multiple data frames (i.e. datasets collected individually 
+using CollectDataTwitter). Simply create a list of the data frames that you wish to create a network from. For
+example, \code{myList <- list(myTwitterData1, myTwitterData2, myTwitterData3)}.
 }
 \seealso{
-See \code{CollectDataTwitter} to collect data for creating semantic
-networks in vosonSML.
-}
-\author{
-Timothy Graham <timothy.graham@anu.edu.au> & Robert Ackland
-<robert.ackland@anu.edu.au>
+\code{CollectDataTwitter}
 }
 \keyword{SNA}
 \keyword{igraph}
diff --git a/vosonSML/man/GetYoutubeVideoIDs.Rd b/vosonSML/man/GetYoutubeVideoIDs.Rd
index df14bd6..5184b4c 100644
--- a/vosonSML/man/GetYoutubeVideoIDs.Rd
+++ b/vosonSML/man/GetYoutubeVideoIDs.Rd
@@ -2,64 +2,55 @@
 % Please edit documentation in R/GetYoutubeVideoIDs.R
 \name{GetYoutubeVideoIDs}
 \alias{GetYoutubeVideoIDs}
-\title{Extract/scrape the IDs from a set of YouTube video URLs}
+\title{Extract the IDs from a set of YouTube video URLs}
 \usage{
 GetYoutubeVideoIDs(file)
 }
 \arguments{
-\item{file}{The connection to read from. This can be a local file, or a http
-or ftp connection. It can also be a character string with the file name or
-URI. The file must be plain text format with the URL of each YouTube video
-specified on a new line (separated by character return). For example, the
-first line might contain https://www.youtube.com/watch?v=73I5dRucCds, and
-the second line might contain https://www.youtube.com/watch?v=6S9r_YbqHy8.}
+\item{file}{The connection to read from. This can be a local file, or a http or ftp connection. It can also be a 
+character string with the file name or URI. The file must be plain text format with the URL of each YouTube video
+specified on a new line (separated by character return). For example, the first line might contain 
+https://www.youtube.com/watch?v=73I5dRucCds, and the second line might contain 
+https://www.youtube.com/watch?v=6S9r_YbqHy8.}
 }
 \value{
-a character vector representing a set of YouTube video IDs, each
-with number of characters equal to 11 (e.g. "73I5dRucCds").
+a character vector representing a set of YouTube video IDs, each with number of characters equal to 11 
+(e.g. "73I5dRucCds").
 }
 \description{
-This function reads a list of YouTube video URLs from a text file and
-converts them to a vector object. For example,
-"https://www.youtube.com/watch?v=73I5dRucCds" has the ID "73I5dRucCds". This
-function can be used to create an object for the argument \code{videoIDs} in
-the function \code{CollectDataYoutube}, that is, by extracting the IDs for a
-set of YouTube videos and compiling them into a vector, ready for collecting
-data with \code{CollectDataYoutube}.
+This function reads a list of YouTube video URLs from a text file and converts them to a vector object. For example,
+"https://www.youtube.com/watch?v=73I5dRucCds" has the ID "73I5dRucCds". This function can be used to create an 
+object for the argument \code{videoIDs} in the function \code{CollectDataYoutube}, that is, by extracting the IDs 
+for a set of YouTube videos and compiling them into a vector, ready for collecting data with 
+\code{CollectDataYoutube}.
 }
 \note{
-This function is useful for lots of videos. However, many videos may
-take a *long* time to collect data from. In such cases it is recommended to
-use the \code{verbose=TRUE} argument for the function
-\code{CollectDataYoutube}, in order to keep track of progress during
-computation.
+This function is useful for lots of videos. However, many videos may take a *long* time to collect data from. 
+In such cases it is recommended to use the \code{verbose = TRUE} argument for the function \code{CollectDataYoutube}
+, in order to keep track of progress during computation.
 }
 \examples{
-
 \dontrun{
-  ## This example shows how to use `GetYoutubeVideoIDs` to extract video IDs from YouTube
-  ## video URLs, and then collect data using the function `CollectDataYoutube`
+# this example shows how to use 'GetYoutubeVideoIDs' to extract video IDs from YouTube video 
+# URLs, and then collect data using the function 'CollectDataYoutube'
 
-  # Use your own Google Developer API Key here:
-  myApiKey <- "1234567890"
+# set your Google Developer API key
+myYtApiKey <- "xxxxxxxxxx"
 
-  # Authenticate with the Google API
-  apiKeyYoutube <- AuthenticateWithYoutubeAPI(apiKeyYoutube=myApiKey)
+# authenticate with the Google API
+apiKeyYoutube <- AuthenticateWithYoutubeAPI(apiKeyYoutube = myYtApiKey)
 
-  # Use the function `GetYoutubeVideoIDs` to automatically generate vector of IDs from
-  # a plain text file of video URLs
-  videoIDs <- GetYoutubeVideoIDs(file="youtube_to_scrape.txt")
+# use the function 'GetYoutubeVideoIDs' to automatically generate vector of IDs from a plain 
+# text file of video URLs
+videoIDs <- GetYoutubeVideoIDs(file = "youtube_urls_to_scrape.txt")
 
-  # Collect the data using function `CollectDataYoutube`
-  myYoutubeData <- CollectDataYoutube(videoIDs,apiKeyYoutube,writeToFile=FALSE)
+# collect the data using function 'CollectDataYoutube'
+myYoutubeData <- CollectDataYoutube(videoIDs, apiKeyYoutube, writeToFile = FALSE)
 }
+
 }
 \seealso{
-Use \code{CollectDataYoutube} for collecting YouTube comments data.
-}
-\author{
-Timothy Graham <timothy.graham@anu.edu.au> & Robert Ackland
-<robert.ackland@anu.edu.au>
+\code{CollectDataYoutube}
 }
 \keyword{scraping}
 \keyword{vosonSML}
diff --git a/vosonSML/man/GraphUserInfoTwitter.Rd b/vosonSML/man/GraphUserInfoTwitter.Rd
new file mode 100644
index 0000000..c0a0ad7
--- /dev/null
+++ b/vosonSML/man/GraphUserInfoTwitter.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/GraphUserInfoTwitter.R
+\name{GraphUserInfoTwitter}
+\alias{GraphUserInfoTwitter}
+\title{Create twitter network graph with user information attributes}
+\usage{
+GraphUserInfoTwitter(df_collect, df_relations, df_users,
+  lookup_missing_users = TRUE, twitter_token = NULL,
+  writeToFile = FALSE)
+}
+\arguments{
+\item{df_collect}{A dataframe containing the collected tweet data from \code{Collect}.}
+
+\item{df_relations}{A dataframe containing the network relations data from \code{Create}.}
+
+\item{df_users}{A dataframe containing the network users data from \code{Create}.}
+
+\item{lookup_missing_users}{Logical. Request user information for any users missing from df_collect. Default 
+is \code{TRUE}.}
+
+\item{twitter_token}{An twitter authentication token from \code{Authenticate}.}
+
+\item{writeToFile}{Logical. If \code{TRUE} a data frame of user information and the resulting network graph will 
+be saved to file. Default is \code{FALSE}.}
+}
+\value{
+A list containing a dataframe with user information and an igraph object of the twitter network with 
+user node attributes.
+}
+\description{
+Creates a network from the relations and users dataframes generated by Create. Network is supplemented with 
+additional downloaded user information applied as node attributes.
+}
+\note{
+Only supports twitter actor network at this time. Bimodal network support will require the filtering 
+of twitter user ids from nodes of other types.
+}
diff --git a/vosonSML/man/PopulateUserInfo.Rd b/vosonSML/man/PopulateUserInfo.Rd
deleted file mode 100644
index 9635e76..0000000
--- a/vosonSML/man/PopulateUserInfo.Rd
+++ /dev/null
@@ -1,37 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/PopulateUserInfo.R
-\name{PopulateUserInfo}
-\alias{PopulateUserInfo}
-\title{Populate Twitter networks with user information}
-\usage{
-PopulateUserInfo(networkObject)
-}
-\arguments{
-\item{networkObject}{an igraph graph object created with \code{\link{Create}}}
-}
-\value{
-An igraph graph object
-}
-\description{
-This function is used to 'populate' Twitter networks (generated
-with the \code{\link{Create}} function) with information about
-the users in the network. This involves calls to the Twitter API
-to collect this information, which is then applied to the network
-as vertex attributes.
-}
-\examples{
-
-\dontrun{
-require(magrittr)
-## Get Twitter user information and apply to network
-myTwitterNetwork_userInfo <- PopulateUserInfo(myTwitterNetwork)
-
-}
-}
-\seealso{
-\code{\link{Collect}}, \code{\link{Create}}
-}
-\author{
-Timothy Graham <timothy.graham@anu.edu.au> & Robert Ackland
-<robert.ackland@anu.edu.au>
-}
diff --git a/vosonSML/man/SaveCredential.Rd b/vosonSML/man/SaveCredential.Rd
index 1dbfb61..beaa8a1 100644
--- a/vosonSML/man/SaveCredential.Rd
+++ b/vosonSML/man/SaveCredential.Rd
@@ -1,47 +1,45 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/Authenticate.R
+% Please edit documentation in R/SaveCredential.R
 \name{SaveCredential}
 \alias{SaveCredential}
 \alias{LoadCredential}
 \title{Save and load credential information}
 \usage{
-SaveCredential(credential, filename = "credential.RDS")
+SaveCredential(credential, filename)
 
-LoadCredential(filename = "credential.RDS")
+LoadCredential(filename)
 }
 \arguments{
-\item{credential}{\code{credential} object}
+\item{credential}{A \code{credential} object.}
 
-\item{filename}{character, filename to be saved to or restored from}
+\item{filename}{Character string. Filename to be saved to or restored from. Default value is \code{credential.RDS}.}
 }
 \value{
-\code{credential} object
+A \code{credential} object.
 }
 \description{
-Functions to save and load credential information. Currently, credential
-information will be stored as a RDS file. \code{SaveCredential} will return
-the input \code{credential}, useful for working as a filter between the
+Functions to save and load credential information. Currently, credential information will be stored as a RDS file. 
+\code{SaveCredential} will return the input \code{credential}, useful for working as a filter between 
 \code{Authenticate} and \code{Collect}.
 }
-\note{
-\code{credential} created from \code{Authenticate} with socialmedia =
-'twitter' will not be saved by SaveCredential
-}
 \examples{
-
 \dontrun{
 require(magrittr)
-myAppID <- "123456789098765"
-myAppSecret <- "abc123abc123abc123abc123abc123ab"
-myUsernames <- c("senjohnmccain","obama")
-
-Authenticate("instagram",
-appID = myAppId,
-appSecret = myAppSecret) \%>\% SaveCredential("instagramCred.RDS") \%>\% Collect(ego = TRUE,
-username = myUsernames) \%>\% Create
-
-## Load the previously saved credential information
-LoadCredential("instagramCred.RDS") \%>\% Collect(tag="obama",
-distance=5000, n=100) \%>\% Create("bimodal")
+
+## save credential example
+
+myIgAppID <- "xxxxxxxxxxx"
+myIgAppSecret <- "xxxxxxxxxxxxxxxxxxxxxx"
+listIgUsernames <- c("senjohnmccain", "obama")
+
+Authenticate("instagram", appID = myIgAppID, appSecret = myIgAppSecret) \%>\% 
+  SaveCredential("instagramCred.RDS") \%>\% 
+  Collect(ego = TRUE, username = listIgUsernames) \%>\% Create()
+
+## load previously saved credential example
+
+LoadCredential("instagramCred.RDS") \%>\% 
+  Collect(tag = "obama", distance = 5000, n = 100) \%>\% Create("bimodal")
 }
+
 }
diff --git a/vosonSML/man/importData.Rd b/vosonSML/man/importData.Rd
index 4207def..a21d62a 100644
--- a/vosonSML/man/importData.Rd
+++ b/vosonSML/man/importData.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/importData.R
-\name{importData}
-\alias{importData}
+% Please edit documentation in R/ImportData.R
+\name{ImportData}
+\alias{ImportData}
 \title{Import vosonSML data previously saved to disk using the `Collect()` function.}
 \usage{
-importData(file, dataSource)
+ImportData(file, dataSource)
 }
 \arguments{
 \item{file}{character, specifying the file path to the data to be imported}
@@ -38,7 +38,7 @@ myFacebookData <- Authenticate("Facebook", appID = appID, appSecret = appSecret)
      rangeTo="2015-03-02", writeToFile=TRUE)
 
 # Import the data (that was saved to disk in the previous step)
-myStarWarsData <- importData("2015-03-01_to_2015-03-02_StarWars_FacebookData.csv","facebook")
+myStarWarsData <- ImportData("2015-03-01_to_2015-03-02_StarWars_FacebookData.csv","facebook")
 
 # Create a network using the imported dataframe object
 myNetwork <- myStarWarsData \%>\% Create("Bimodal")
diff --git a/vosonSML/man/vosonSML-package.Rd b/vosonSML/man/vosonSML-package.Rd
index 2cd30f6..250003e 100644
--- a/vosonSML/man/vosonSML-package.Rd
+++ b/vosonSML/man/vosonSML-package.Rd
@@ -6,22 +6,22 @@
 \alias{vosonSML}
 \title{Collection and network analysis of social media data}
 \description{
-The goal of the vosonSML package is to provide a suite of easy-to-use tools for collecting data from social media 
-sources (Instagram, Facebook, Twitter, Youtube, and Reddit) and generating different types of networks suited to 
-Social Network Analysis (SNA) and text analytics. It offers tools to create unimodal, multimodal, semantic, and 
-dynamic networks. It draws on excellent packages such as \pkg{twitteR}, \pkg{instaR}, \pkg{Rfacebook}, 
-\pkg{RedditExtractoR} and \pkg{igraph} in order to provide an integrated 'work flow' for collecting different types 
-of social media data and creating different types of networks out of these data. Creating networks from social media 
+The goal of the vosonSML package is to provide a suite of easy-to-use tools for collecting data from social media
+sources (Instagram, Facebook, Twitter, Youtube, and Reddit) and generating different types of networks suited to
+Social Network Analysis (SNA) and text analytics. It offers tools to create unimodal, multimodal, semantic, and
+dynamic networks. It draws on excellent packages such as \pkg{rtweet}, \pkg{instaR}, \pkg{Rfacebook},
+\pkg{RedditExtractoR} and \pkg{igraph} in order to provide an integrated 'work flow' for collecting different types
+of social media data and creating different types of networks out of these data. Creating networks from social media
 data is often non-trivial and time consuming. This package simplifies such tasks so users can focus on analysis.
 }
 \details{
-vosonSML uses a straightforward S3 class system. Data collected with this package produces \code{data.table} objects 
-(extension of class \code{data.frame}), which are assigned the class \code{dataSource}. Additionally, 
-\code{dataSource} objects are assigned a class identifying the source of data, e.g. \code{facebook} or \code{youtube}
-. In this way, \code{dataSource} objects are fast, easy to work with, and can be used as input to easily construct 
-different types of networks. For example, the function \code{\link{Collect}} can be used to collect Twitter data, 
-which is then 'piped' to the \code{\link{Create}} function, resulting in a network (an igraph object) that is ready 
-for analysis.
+vosonSML uses a straightforward S3 class system. Data collected with this package produces \code{data.table} objects
+(extension of class \code{data.frame}), which are assigned the class \code{dataSource}. Additionally,
+\code{dataSource} objects are assigned a class identifying the source of data, e.g. \code{facebook} or
+\code{youtube}. In this way, \code{dataSource} objects are fast, easy to work with, and can be used as input to
+easily construct different types of networks. For example, the function \code{Collect} can be used to collect
+Twitter data, which is then 'piped' to the \code{Create} function, resulting in a network (an igraph object)
+that is ready for analysis.
 }
 \author{
 Created by Timothy Graham and Robert Ackland, with major contributions by Chung-hong Chan and Bryan Gertzel.
diff --git a/vosonSML/tests/testthat.R b/vosonSML/tests/testthat.R
deleted file mode 100644
index d752e69..0000000
--- a/vosonSML/tests/testthat.R
+++ /dev/null
@@ -1,4 +0,0 @@
-# library(testthat)
-# library(vosonSML)
-
-# test_check("vosonSML")
diff --git a/vosonSML/tests/testthat/cred_empty.R b/vosonSML/tests/testthat/cred_empty.R
deleted file mode 100644
index e541683..0000000
--- a/vosonSML/tests/testthat/cred_empty.R
+++ /dev/null
@@ -1,5 +0,0 @@
-### please modify me and rename me to cred.R
-### WARNING: don't add cred.R to github repo
-### cred.R already in .gitignore
-
-yt <- ""
diff --git a/vosonSML/tests/testthat/test_youtube.R b/vosonSML/tests/testthat/test_youtube.R
deleted file mode 100644
index 43d817b..0000000
--- a/vosonSML/tests/testthat/test_youtube.R
+++ /dev/null
@@ -1,11 +0,0 @@
-source("cred.R")
-require(magrittr)
-
-## "4_hHKlEZ9Go" is a closed comment video
-
-test_that("Youtube Empty Comment Error",{
-    expect_error(Authenticate("youtube", yt) %>% Collect(videoIDs = c("4_hHKlEZ9Go")), "No comment can be collected from the given videoIDs.")
-### however, multiple videoIDs with only one with empty comment should not throw an error.
-    borat <- Authenticate("youtube", yt) %>% Collect(videoIDs = c("4_hHKlEZ9Go", "YzdYF0r3gB4"))
-    expect_true("dataSource" %in% class(borat))
-})