cyclestreets · mem48 · Sep 8, 2023 · Sep 8, 2023 · Sep 10, 2023 · Sep 10, 2023
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -39,7 +39,6 @@ Imports:
     magrittr,
     progressr,
     RcppSimdJson,
-    readr,
     sf,
     stringr,
     stringi

diff --git a/R/batch_read.R b/R/batch_read.R
@@ -12,14 +12,15 @@ batch_read = function(
     cols_to_keep = c(
       "name", # not used currently but could be handy
       "distances",
-      "gradient_smooth",
+      "elevations",
       "quietness"
     )
     ) {
   message("Reading in the following file:\n", file)
-  res = readr::read_csv(file, show_col_types = FALSE)
-  res$route_number = seq(nrow(res))
-  n_char = nchar(res$json)
+
+  res = data.table::fread(file, select = "json")
+  res = stringi::stri_replace_all_fixed(res$json, '""', '"', vectorize_all = FALSE)
+  n_char = nchar(res)
   n_char[is.na(n_char)] = 0
   if(all(n_char == 0)) {
     stop("No routes returned: does CycleStreets operate where you requested data?")
@@ -28,11 +29,11 @@ batch_read = function(
   if(min_nchar == 0) {
     which_min_ncar = which(n_char == 0)
     message("Removing NA routes: ", paste(which_min_ncar, collapse = " "))
-    res = res[-which_min_ncar, ]
+    res = res[-which_min_ncar]
   }
 
-  res_df = json2sf_cs(results_raw = res$json,
-                       id = res$route_number,
+  res = json2sf_cs(results_raw = res,
+                       id = seq(length(res)),
                        segments = segments,
                       cols_to_keep = cols_to_keep
                       )
@@ -52,33 +53,33 @@ batch_read = function(
     }
 
     for(i in seq(1, length(nms))){
-      if(nms[i] %in% names(res_df$routes)){
-        res_df$routes[[nms[i]]] = as.numeric(res_df$routes[[nms[i]]])
+      if(nms[i] %in% names(res$routes)){
+        res$routes[[nms[i]]] = as.numeric(res$routes[[nms[i]]])
       }
     }
-    names(res_df$routes)[names(res_df$routes) == "id"] = "route_number"
+    names(res$routes)[names(res$routes) == "id"] = "route_number"
 
     for(i in seq(1, length(nms))){
-      if(nms[i] %in% names(res_df$segments)){
-        res_df$segments[[nms[i]]] = as.numeric(res_df$segments[[nms[i]]])
+      if(nms[i] %in% names(res$segments)){
+        res$segments[[nms[i]]] = as.numeric(res$segments[[nms[i]]])
       }
     }
-    names(res_df$segments)[names(res_df$segments) == "id"] = "route_number"
+    names(res$segments)[names(res$segments) == "id"] = "route_number"
 
   } else {
 
     for(i in seq(1, length(nms))){
-      if(nms[i] %in% names(res_df)){
-        res_df[[nms[i]]] = as.numeric(res_df[[nms[i]]])
+      if(nms[i] %in% names(res)){
+        res[[nms[i]]] = as.numeric(res[[nms[i]]])
       }
     }
 
-    names(res_df)[names(res_df) == "id"] = "route_number"
+    names(res)[names(res) == "id"] = "route_number"
 
 
   }
 
-  res_df
+  res
 
 }
 

diff --git a/R/json2sf_cs.R b/R/json2sf_cs.R
@@ -61,8 +61,7 @@ json2sf_cs = function(
                      "start_latitude", "finish_longitude", "finish_latitude", "crow_fly_distance",
                      "event", "whence", "speed", "itinerary", "plan", "note", "length",
                      "west", "south", "east", "north", "leaving", "arriving", "grammesCO2saved",
-                     "calories", "edition", "gradient_segment", "elevation_change",
-                     "gradient_smooth")
+                     "calories", "edition")
 ){
 
   # Support both
@@ -79,7 +78,6 @@ json2sf_cs = function(
   }
 
   # browser()
-  results = RcppSimdJson::fparse(results_raw, query = "/marker", query_error_ok = TRUE, always_list = TRUE)
   results_error = RcppSimdJson::fparse(results_raw, query = "/error", query_error_ok = TRUE, always_list = TRUE)
   results_error = unlist(results_error, use.names = FALSE)
   if(length(results_error) > 0){
@@ -90,14 +88,21 @@ json2sf_cs = function(
       message(results_error$Freq[msgs],'x messages: "',results_error$results_error[msgs],'"\n')
     }
   }
-
+  results = RcppSimdJson::fparse(results_raw, query = "/marker", query_error_ok = TRUE, always_list = TRUE)
+  #rm(results_raw)
   # Process Marker
   results = lapply(results, `[[`, "@attributes")
   if(!is.null(id)){
     names(results) = as.character(id)
   }
-  # TODO: subset to keep only columns of relevance
-  results = lapply(results, data.table::rbindlist, fill = TRUE)
+
+  cols_to_keep2 = unique(c(cols_to_keep,"type","start","points","coordinates", "distances","elevations"))
+
+  results = lapply(results, function(x){
+    x = lapply(x, function(y){y[names(y) %in% cols_to_keep2]})
+    data.table::rbindlist(x, fill = TRUE)
+  })
+
   results = data.table::rbindlist(results, idcol = "id", fill = TRUE)
   if(nrow(results) == 0){
     stop("No valid results returned")
@@ -141,6 +146,7 @@ cleanup_results <- function(x, cols_to_keep){
   x = add_columns(x)
   x = sf::st_as_sf(x)
   x$SPECIALIDFORINTERNAL2 <- NULL
-  cols = cols_to_keep %in% names(x)
-  x[cols_to_keep]
+  cols_to_keep3 = unique(c(cols_to_keep,"gradient_segment","elevation_change","gradient_smooth"))
+  cols = cols_to_keep3 %in% names(x)
+  x[cols]
 }
diff --git a/R/utils.R b/R/utils.R
@@ -44,10 +44,10 @@ route_rolling_average = function(x, n = 3) {
 
 
 get_values = function(v, fun) {
-  sapply(v, function(x) fun(as.numeric(x)))
+  vapply(v, function(x) fun(as.numeric(x)), 1)
 }
 
-extract_values = function(x) stringr::str_split(x, pattern = ",")
+extract_values = function(x) stringi::stri_split_fixed(x, pattern = ",")
 get_mean = function(v) get_values(v, fun = mean)
 get_sum = function(v) get_values(v, fun = sum)
 get_min = function(v) get_values(v, fun = min)

diff --git a/man/batch.Rd b/man/batch.Rd
diff --git a/man/json2sf_cs.Rd b/man/json2sf_cs.Rd