-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathall_metadata.R
60 lines (56 loc) · 1.95 KB
/
all_metadata.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#' This function downloads the metadata for all projects.
#'
#' Download the metadata from all the projects. This can be useful for finding
#' samples of interests across all projects.
#'
#' @param subset Either `sra`, `gtex` or `tcga`. Specifies
#' which metadata file to download.
#' @param verbose If `TRUE` it will print a message of where the file is
#' being downloaded to.
#'
#' @return A [DataFrame-class][S4Vectors::DataFrame-class] object with the phenotype
#' metadata.
#'
#' @author Leonardo Collado-Torres
#' @export
#'
#' @import downloader
#'
#' @examples
#'
#' metadata <- all_metadata()
#' @details Note that for `subset = 'gtex'`, there are more variables than
#' the ones we have for 'sra'. This information corresponds to file
#' GTEx_Data_V6_Annotations_SampleAttributesDS.txt available at
#' <http://www.gtexportal.org/home/datasets>. There you can find the
#' information describing these variables.
#'
#' For TCGA we acquired metadata information from 3 different sources:
#' - GDC: via a json query
#' - CGC: via json queries and a custom script to merge the tables
#' - TCGAbiolinks: we used to to parse GDC's XML files
#' For more information, check <https://github.com/leekgroup/recount-website/tree/master/metadata/tcga_prep>.
#'
all_metadata <- function(subset = "sra", verbose = TRUE) {
## For R CMD check
metadata_clean <- NULL
## check inputs
subset <- tolower(subset)
stopifnot(subset %in% c("sra", "gtex", "tcga"))
stopifnot(length(subset) == 1)
## Download file
metafile <- paste0("metadata_clean_", subset, ".Rdata")
url <- paste0(
"https://github.com/leekgroup/recount-website/blob/master/metadata/",
metafile, "?raw=true"
)
destfile <- file.path(tempdir(), metafile)
if (verbose) message(paste(Sys.time(), "downloading the metadata to", destfile))
download_retry(
url = url,
destfile = destfile,
mode = "wb"
)
load(destfile)
return(metadata_clean)
}