Skip to content

Commit f463bd8

Browse files
author
rafapereirabr
committed
prep census tract other years
1 parent 97277d6 commit f463bd8

File tree

3 files changed

+171
-115
lines changed

3 files changed

+171
-115
lines changed

r-package/NEWS.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# geobr v1.9.0
1+
# geobr v1.9.0 dev
22

33
**Major changes**
44

r-package/prep_data/prep_census_tract.R

+158-104
Original file line numberDiff line numberDiff line change
@@ -23,142 +23,178 @@ library(future)
2323
source("./prep_data/prep_functions.R")
2424

2525

26-
# Set a root directory
27-
root_dir <- "L:////# DIRUR #//ASMEQ//geobr//data-raw//setores_censitarios"
28-
setwd(root_dir)
2926

3027

3128

32-
# If the data set is updated regularly, you should create a function that will have
33-
# a `date` argument download the data
29+
#### url to setores -----------------
3430

35-
36-
37-
38-
#### 0. Download original data sets from IBGE ftp -----------------
39-
40-
# setores 2010
31+
if(year == 2010){
4132
ftp <- "ftp://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_de_setores_censitarios__divisoes_intramunicipais/censo_2010/setores_censitarios_shp/"
33+
year_dir <- 2010
34+
}
4235

43-
# setores 2000 rural
44-
ftp2 <- "ftp://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_de_setores_censitarios__divisoes_intramunicipais/censo_2000/setor_rural/projecao_geografica/censo_2000/e500_arcview_shp/uf/"
45-
46-
# setores 2000 urbano
47-
ftp3 <- "ftp://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_de_setores_censitarios__divisoes_intramunicipais/censo_2000/setor_urbano/"
48-
49-
# setores 2019
50-
ftp4 <- "ftp://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_de_setores_censitarios__divisoes_intramunicipais/2019/Malha_de_setores_(shp)_por_UFs/"
36+
if(year == '2000_rural'){
37+
ftp <- "ftp://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_de_setores_censitarios__divisoes_intramunicipais/censo_2000/setor_rural/projecao_geografica/censo_2000/e500_arcview_shp/uf/"
38+
year_dir <- '2000_rural'
39+
}
5140

52-
# setores 2020
53-
ftp5 <- "ftp://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_de_setores_censitarios__divisoes_intramunicipais/2020/Malha_de_setores_(shp)_por_UFs/"
41+
if(year == '2000_urbano'){
42+
ftp <- "ftp://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_de_setores_censitarios__divisoes_intramunicipais/censo_2000/setor_urbano/"
43+
year_dir <- '2000_urbano'
44+
}
5445

46+
if(year == 2019){
47+
ftp <- "ftp://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_de_setores_censitarios__divisoes_intramunicipais/2019/Malha_de_setores_(shp)_por_UFs/"
48+
year_dir <- 2019
49+
}
5550

56-
# lista de ftp de 2010,2019 e 2020
57-
ftplist <- c(ftp, ftp4, ftp5)
58-
ftplist <- c(ftp4, ftp5)
51+
if(year == 2020){
52+
ftp <- "ftp://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_de_setores_censitarios__divisoes_intramunicipais/2020/Malha_de_setores_(shp)_por_UFs/"
53+
year_dir <- 2020
54+
}
5955

60-
for (ftp1 in ftplist){ # ftp1 <- FTPLIST[3]
6156

62-
# year directory
63-
if(ftp1 == ftp) { year_dir <- 2010}
64-
if(ftp1 %in% c(ftp2, ftp3)) { year_dir <- 2010}
65-
if(ftp1 ==ftp4) { year_dir <- 2019}
66-
if(ftp1 ==ftp5) { year_dir <- 2020}
57+
if(year == 2022){
58+
ftp <- "https://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_de_setores_censitarios__divisoes_intramunicipais/censo_2022_preliminar/setores/gpkg/BR/BR_Malha_Preliminar_2022.zip"
59+
year_dir <- 2022
60+
}
6761

68-
dir.create( paste0('./', year_dir),showWarnings = F )
6962

70-
### setor censitario censo
71-
filenames = getURL(ftp1, ftp.use.epsv = FALSE, dirlistonly = TRUE)
72-
filenames <- strsplit(filenames, "\r\n")
73-
filenames = unlist(filenames)
74-
filenames <- filenames[!grepl('leia_me', filenames)]
7563

76-
# filesurl<-paste(ftp, filenames[9],"/", sep = "")
77-
# filesurl<-getURL(filesurl, ftp.use.epsv = FALSE, dirlistonly = TRUE)
78-
# filesurl<-strsplit(filesurl, "\r\n")
79-
# filesurl<-unlist(filesurl)
64+
# create dest dir
65+
raw_dir <- paste0('./data_raw/census_tracts/',year)
66+
dest_dir <- paste0('./data/census_tracts/',year)
67+
dir.create(raw_dir, recursive = T)
68+
dir.create(dest_dir, recursive = T)
8069

81-
#fazendo download dos dados zipados
82-
for (filename in filenames) {
83-
filesurl<-paste(ftp1, filename,"/", sep = "")
84-
filesurl<-getURL(filesurl, ftp.use.epsv = FALSE, dirlistonly = TRUE)
85-
filesurl<-strsplit(filesurl, "\r\n")
86-
filesurl<-unlist(filesurl)
8770

88-
fileyear <- regmatches(filesurl, gregexpr("[0-9]+",filesurl))
89-
fileyear <- unlist(fileyear)
90-
dir.fonte <- paste0("./",fileyear,"/",filename)
9171

92-
for (fonte in dir.fonte){ # fonte <- dir.fonte[1]
93-
dir.create(fonte, recursive = T)
9472

95-
for (files in filesurl){ # files <- filesurl[1]
96-
download.file(paste(ftp1, filename,"/", files, sep = ""),paste(fonte,"/",files, sep = ""))
97-
}
98-
}
99-
}
100-
}
10173

74+
#### 0. Download original data sets from IBGE ftp -----------------
10275

103-
### setor censitario rural censo 2000
104-
filenames = getURL(ftp2, ftp.use.epsv = FALSE, dirlistonly = TRUE)
105-
filenames <- strsplit(filenames, "\r\n")
106-
filenames = unlist(filenames)
107-
filenames <- filenames[!grepl('leia_me', filenames)]
76+
if(year == 2022){
10877

78+
dest_file <- download_file(file_url = ftp)
10979

110-
#fazendo download dos dados zipados
111-
for (filename in filenames) {
112-
filesurl<-paste(ftp2, filename,"/", sep = "")
113-
filesurl<-getURL(filesurl, ftp.use.epsv = FALSE, dirlistonly = TRUE)
114-
filesurl<-strsplit(filesurl, "\r\n")
115-
filesurl<-unlist(filesurl)
80+
temp_dir <- tempdir()
11681

117-
dir.fonte <- paste0("//Storage6/usuarios/# DIRUR #/ASMEQ/geobr//data-raw//setores_censitarios/censo_2000/",filename)
118-
dir.create(dir.fonte,recursive = T)
82+
unzip(dest_file, exdir = temp_dir)
11983

120-
for (files in filesurl) {
121-
download.file(paste(ftp2, filename,"/",files, sep = ""),paste(dir.fonte,"/",files,sep = ""))
122-
}
84+
local_file <- unzip_fun(dest_file)
12385
}
12486

125-
### setor censitario urbano censo 2000
126-
127-
filenames = getURL(ftp3, ftp.use.epsv = FALSE, dirlistonly = TRUE)
128-
filenames <- strsplit(filenames, "\r\n")
129-
filenames = unlist(filenames)
130-
filenames <- filenames[!grepl('leia_me', filenames)]
131-
132-
133-
dir.fonte <- paste0("//Storage6/usuarios/# DIRUR #/ASMEQ/geobr//data-raw//setores_censitarios/censo_2000/Urbano/")
134-
filespasta<-list.files(dir.fonte)
135-
filespasta<-unlist(filespasta)
136-
difflies<-setdiff(filenames,filespasta)
137-
138-
#fazendo download dos dados zipados
13987

140-
for (filename in difflies) {
141-
filesurl<-paste(ftp3, filename,"/", sep = "")
142-
filesurl<-getURL(filesurl, ftp.use.epsv = FALSE, dirlistonly = TRUE)
143-
filesurl<-strsplit(filesurl, "\r\n")
144-
filesurl<-unlist(filesurl)
14588

146-
dir.fonte <- paste0("//Storage6/usuarios/# DIRUR #/ASMEQ/geobr//data-raw//setores_censitarios/censo_2000/Urbano/",filename)
147-
dir.create(dir.fonte,recursive = T)
14889

14990

150-
for (files in filesurl) {
15191

152-
if ( grepl("3300704",files)) { download.file(paste(ftp3, filename,"/",files,"/",files,"_2000.zip", sep = ""),paste(dir.fonte,"/",files,".zip",sep = ""),quiet = T)
153-
}
154-
else if (grepl(".zip",files)){
155-
download.file(paste(ftp3, filename,"/",files, sep = ""),paste(dir.fonte,"/",files,sep = ""),quiet = T)
156-
} else {
157-
download.file(paste(ftp3, filename,"/",files,"/",files,".zip", sep = ""),paste(dir.fonte,"/",files,".zip",sep = ""),quiet = T)
158-
}
159-
}
160-
}
16192

93+
# #6666666666666666666666666666
94+
# for (ftp1 in ftplist){ # ftp1 <- FTPLIST[3]
95+
#
96+
#
97+
# # create dir
98+
# dir.create( paste0('./data/census_tract/', year_dir), recursive = T, showWarnings = T )
99+
#
100+
# ### setor censitario censo
101+
#
102+
# filenames <- list_folders(ftp)
103+
#
104+
# filenames <- strsplit(filenames, "\r\n")
105+
# filenames = unlist(filenames)
106+
# filenames <- filenames[!grepl('leia_me', filenames)]
107+
# filenames <- filenames[!grepl('?C=', filenames)]
108+
# filenames <- filenames[!grepl('http', filenames)]
109+
# filenames <- filenames[!grepl('Censos/Censo_Demografico_2022', filenames)]
110+
#
111+
#
112+
# #fazendo download dos dados zipados
113+
# for (f in filenames) {
114+
# filesurl<-paste(ftp, "/",f,"/", sep = "")
115+
#
116+
# filesurl <- list_folders(filesurl)
117+
# filesurl <- filesurl[grepl('gpkg', filenames)]
118+
#
119+
#
120+
# filesurl<-getURL(filesurl, ftp.use.epsv = FALSE, dirlistonly = TRUE)
121+
# filesurl<-strsplit(filesurl, "\r\n")
122+
# filesurl<-unlist(filesurl)
123+
#
124+
# fileyear <- regmatches(filesurl, gregexpr("[0-9]+",filesurl))
125+
# fileyear <- unlist(fileyear)
126+
# dir.fonte <- paste0("./",fileyear,"/",f)
127+
#
128+
# for (fonte in dir.fonte){ # fonte <- dir.fonte[1]
129+
# dir.create(fonte, recursive = T)
130+
#
131+
# for (files in filesurl){ # files <- filesurl[1]
132+
# download.file(paste(ftp1, f,"/", files, sep = ""),paste(fonte,"/",files, sep = ""))
133+
# }
134+
# }
135+
# }
136+
# }
137+
#
138+
#
139+
# ### setor censitario rural censo 2000
140+
# filenames = getURL(ftp2, ftp.use.epsv = FALSE, dirlistonly = TRUE)
141+
# filenames <- strsplit(filenames, "\r\n")
142+
# filenames = unlist(filenames)
143+
# filenames <- filenames[!grepl('leia_me', filenames)]
144+
#
145+
#
146+
# #fazendo download dos dados zipados
147+
# for (filename in filenames) {
148+
# filesurl<-paste(ftp2, filename,"/", sep = "")
149+
# filesurl<-getURL(filesurl, ftp.use.epsv = FALSE, dirlistonly = TRUE)
150+
# filesurl<-strsplit(filesurl, "\r\n")
151+
# filesurl<-unlist(filesurl)
152+
#
153+
# dir.fonte <- paste0("//Storage6/usuarios/# DIRUR #/ASMEQ/geobr//data-raw//setores_censitarios/censo_2000/",filename)
154+
# dir.create(dir.fonte,recursive = T)
155+
#
156+
# for (files in filesurl) {
157+
# download.file(paste(ftp2, filename,"/",files, sep = ""),paste(dir.fonte,"/",files,sep = ""))
158+
# }
159+
# }
160+
#
161+
# ### setor censitario urbano censo 2000
162+
#
163+
# filenames = getURL(ftp3, ftp.use.epsv = FALSE, dirlistonly = TRUE)
164+
# filenames <- strsplit(filenames, "\r\n")
165+
# filenames = unlist(filenames)
166+
# filenames <- filenames[!grepl('leia_me', filenames)]
167+
#
168+
#
169+
# dir.fonte <- paste0("//Storage6/usuarios/# DIRUR #/ASMEQ/geobr//data-raw//setores_censitarios/censo_2000/Urbano/")
170+
# filespasta<-list.files(dir.fonte)
171+
# filespasta<-unlist(filespasta)
172+
# difflies<-setdiff(filenames,filespasta)
173+
#
174+
# #fazendo download dos dados zipados
175+
#
176+
# for (filename in difflies) {
177+
# filesurl<-paste(ftp3, filename,"/", sep = "")
178+
# filesurl<-getURL(filesurl, ftp.use.epsv = FALSE, dirlistonly = TRUE)
179+
# filesurl<-strsplit(filesurl, "\r\n")
180+
# filesurl<-unlist(filesurl)
181+
#
182+
# dir.fonte <- paste0("//Storage6/usuarios/# DIRUR #/ASMEQ/geobr//data-raw//setores_censitarios/censo_2000/Urbano/",filename)
183+
# dir.create(dir.fonte,recursive = T)
184+
#
185+
#
186+
# for (files in filesurl) {
187+
#
188+
# if ( grepl("3300704",files)) { download.file(paste(ftp3, filename,"/",files,"/",files,"_2000.zip", sep = ""),paste(dir.fonte,"/",files,".zip",sep = ""),quiet = T)
189+
# }
190+
# else if (grepl(".zip",files)){
191+
# download.file(paste(ftp3, filename,"/",files, sep = ""),paste(dir.fonte,"/",files,sep = ""),quiet = T)
192+
# } else {
193+
# download.file(paste(ftp3, filename,"/",files,"/",files,".zip", sep = ""),paste(dir.fonte,"/",files,".zip",sep = ""),quiet = T)
194+
# }
195+
# }
196+
# }
197+
#
162198

163199

164200
######## 1. Unzip original data sets downloaded from IBGE -----------------
@@ -260,6 +296,24 @@ gc(reset = T)
260296

261297
#### 3. Save original data sets downloaded from IBGE in compact .rds format-----------------
262298

299+
if(year==2022){
300+
301+
# list file
302+
all_shapes <- list.files(raw_dir, full.names = T, recursive = T, pattern = ".gpkg")
303+
304+
# read to memory
305+
df <- sf::st_read(all_shapes)
306+
gc()
307+
308+
# file name
309+
file_name <- basename(all_shapes)
310+
file_name <- gsub("\\..*","", file_name)
311+
312+
# save in .rds
313+
saveRDS(df, file = paste0(raw_dir,"/", file_name,'.rds'), compress = TRUE)
314+
315+
}
316+
263317
# List shapes for all years
264318
all_shapes <- list.files(full.names = T, recursive = T, pattern = ".shp|.SHP")
265319
head(all_shapes)

r-package/prep_data/update_metadata_table.R

+12-10
Original file line numberDiff line numberDiff line change
@@ -101,14 +101,15 @@ a <- metadata[geo=='health_facilities']
101101
######### Step 3 - upload data to github ----------------------
102102
all_files <- list.files("//storage1/geobr/data_gpkg", full.names = T, recursive = T)
103103

104-
all_files <- all_files[all_files %like% 'municipality']
105-
all_files <- all_files[all_files %like% '2021|2022']
104+
all_files <- all_files[all_files %like% 'census_tract']
105+
all_files <- all_files[all_files %like% '2022']
106106

107107
# upload data
108108
piggyback::pb_upload(all_files,
109109
"ipeaGIT/geobr",
110-
"v1.7.0",
111-
.token = ttt)
110+
"v1.7.0"
111+
#,.token = ttt
112+
)
112113

113114
#' https://docs.github.com/rest/overview/resources-in-the-rest-api#rate-limiting
114115

@@ -173,12 +174,13 @@ piggyback::pb_upload(to_go,
173174
# save updated metadata table
174175
# readr::write_csv(metadata,"//storage1/geobr/metadata/metadata_1.7.0_gpkg.csv")
175176

176-
# upload updated metadata table github
177-
piggyback::pb_upload("//storage1/geobr/metadata/metadata_1.7.0_gpkg.csv",
178-
"ipeaGIT/geobr",
179-
"v1.7.0",
180-
.token = ttt)
181-
177+
# # upload updated metadata table github
178+
# piggyback::pb_upload("//storage1/geobr/metadata/metadata_1.7.0_gpkg.csv",
179+
# "ipeaGIT/geobr",
180+
# "v1.7.0"
181+
# #, .token = ttt
182+
# )
183+
#
182184

183185

184186

0 commit comments

Comments
 (0)