Skip to content

Commit 173a7b1

Browse files
committed
Merge branch 'master' of https://github.com/ipeaGIT/geobr
# Conflicts: # r-package/DESCRIPTION
2 parents 87be5f7 + 5d79577 commit 173a7b1

12 files changed

+212
-187
lines changed

data_prep/R/health_facilities.R

+24-14
Original file line numberDiff line numberDiff line change
@@ -84,20 +84,23 @@ update_health_facilities <- function(){
8484
'date_update', 'year_update'))
8585

8686

87-
87+
# deal with points with missing coordinates
8888
head(dt)
89-
# dt[is.na(lat) | is.na(lon),]
90-
# dt[lat==0,]
89+
dt[is.na(lat) | is.na(lon),]
90+
dt[lat==0,]
91+
92+
# identify which points should have empty geo
93+
dt[is.na(lat) | is.na(lon), empty_geo := T]
94+
95+
dt[code_cnes=='0000930', lat]
96+
dt[code_cnes=='0000930', lon]
9197

92-
# dt[code_cnes=='0000930', lat]
93-
# dt[code_cnes=='0000930', lon]
94-
#
95-
# # replace NAs with 0
96-
# data.table::setnafill(dt,
97-
# type = "const",
98-
# fill = 0,
99-
# cols=c("lat","lon")
100-
# )
98+
# replace NAs with 0
99+
data.table::setnafill(dt,
100+
type = "const",
101+
fill = 0,
102+
cols=c("lat","lon")
103+
)
101104

102105

103106

@@ -107,18 +110,25 @@ update_health_facilities <- function(){
107110
crs = "+proj=longlat +datum=WGS84")
108111

109112

113+
# convert to point empty
114+
# solution from: https://gis.stackexchange.com/questions/459239/how-to-set-a-geometry-to-na-empty-for-some-features-of-an-sf-dataframe-in-r
115+
temp_sf$geometry[temp_sf$empty_geo == T] = sf::st_point()
116+
117+
subset(temp_sf, code_cnes=='0000930')
118+
119+
110120
# Change CRS to SIRGAS Geodetic reference system "SIRGAS2000" , CRS(4674).
111121
temp_sf <- harmonize_projection(temp_sf)
112122

113123

114124
# create folder to save the data
115-
dest_dir <- paste0('./data/health_facilities/', geobr_date)
125+
dest_dir <- paste0('./data/health_facilities/', geobr_date,'/')
116126
dir.create(path = dest_dir, recursive = TRUE, showWarnings = FALSE)
117127

118128

119129
# Save raw file in sf format
120130
sf::st_write(temp_sf,
121-
dsn= paste0(dest_dir, 'cnes_', date_update,".gpkg"),
131+
dsn= paste0(dest_dir, 'cnes_', geobr_date,".gpkg"),
122132
overwrite = TRUE,
123133
append = FALSE,
124134
delete_dsn = T,

data_prep/R/schools.R

+130
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
#> DATASET: schools 2020
2+
#> Source: INEP -
3+
#> https://www.gov.br/inep/pt-br/acesso-a-informacao/dados-abertos/inep-data/catalogo-de-escolas
4+
#>
5+
#: scale
6+
#> Metadata:
7+
# Titulo: schools
8+
#' Frequencia de atualizacao: anual
9+
#'
10+
#' Forma de apresentação: Shape
11+
#' Linguagem: Pt-BR
12+
#' Character set: Utf-8
13+
#'
14+
#' Resumo: Pontos com coordenadas gegráficas das escolas do censo escolar
15+
#' Informações adicionais: Dados produzidos pelo INEP. Os dados de escolas e sua
16+
#' geolocalização são atualizados pelo INEP continuamente. Para finalidade do geobr,
17+
#' esses dados precisam ser baixados uma vez ao ano
18+
19+
20+
21+
22+
update_schools <- function(){
23+
24+
25+
# If the data set is updated regularly, you should create a function that will have
26+
# a `date` argument download the data
27+
update <- 2023
28+
date_update <- Sys.Date()
29+
30+
# date shown to geobr user
31+
geobr_date <- gsub('-', '' , date_update)
32+
geobr_date <- substr(geobr_date, 1, 6)
33+
34+
35+
# download manual
36+
# https://www.gov.br/inep/pt-br/acesso-a-informacao/dados-abertos/inep-data/catalogo-de-escolas
37+
dt <- fread('C:/Users/r1701707/Downloads/Análise - Tabela da lista das escolas - Detalhado.csv',
38+
encoding = 'UTF-8')
39+
head(dt)
40+
41+
42+
##### 4. Rename columns -------------------------
43+
head(dt)
44+
45+
df <- dplyr::select(dt,
46+
abbrev_state = 'UF',
47+
name_muni = 'Município',
48+
code_school = 'Código INEP',
49+
name_school = 'Escola',
50+
education_level = 'Etapas e Modalidade de Ensino Oferecidas',
51+
education_level_others = 'Outras Ofertas Educacionais',
52+
admin_category = 'Categoria Administrativa',
53+
address = 'Endereço',
54+
phone_number = 'Telefone',
55+
government_level = 'Dependência Administrativa',
56+
private_school_type = 'Categoria Escola Privada',
57+
private_government_partnership = 'Conveniada Poder Público',
58+
regulated_education_council = 'Regulamentação pelo Conselho de Educação',
59+
service_restriction ='Restrição de Atendimento',
60+
size = 'Porte da Escola',
61+
urban = 'Localização',
62+
location_type = 'Localidade Diferenciada',
63+
date_update = 'date_update',
64+
y = 'Latitude',
65+
x = 'Longitude'
66+
)
67+
68+
69+
70+
71+
head(df)
72+
73+
74+
# add update date columns
75+
df[, date_update := as.character(date_update)]
76+
77+
78+
# deal with points with missing coordinates
79+
head(df)
80+
df[is.na(x) | is.na(y),]
81+
df[x==0,]
82+
83+
# identify which points should have empty geo
84+
df[is.na(x) | is.na(y), empty_geo := T]
85+
86+
df[code_school=='11000180', x]
87+
88+
89+
# replace NAs with 0
90+
data.table::setnafill(df,
91+
type = "const",
92+
fill = 0,
93+
cols=c("x","y")
94+
)
95+
96+
97+
98+
# Convert originl data frame into sf
99+
temp_sf <- sf::st_as_sf(x = df,
100+
coords = c("x", "y"),
101+
crs = "+proj=longlat +datum=WGS84")
102+
103+
104+
# convert to point empty
105+
# solution from: https://gis.stackexchange.com/questions/459239/how-to-set-a-geometry-to-na-empty-for-some-features-of-an-sf-dataframe-in-r
106+
temp_sf$geometry[temp_sf$empty_geo == T] = sf::st_point()
107+
108+
subset(temp_sf, code_school=='11000180')
109+
110+
111+
# Change CRS to SIRGAS Geodetic reference system "SIRGAS2000" , CRS(4674).
112+
temp_sf <- harmonize_projection(temp_sf)
113+
114+
115+
# create folder to save the data
116+
dest_dir <- paste0('./data/schools/', update,'/')
117+
dir.create(path = dest_dir, recursive = TRUE, showWarnings = FALSE)
118+
119+
120+
# Save raw file in sf format
121+
sf::st_write(temp_sf,
122+
dsn= paste0(dest_dir, 'schools_', update,".gpkg"),
123+
overwrite = TRUE,
124+
append = FALSE,
125+
delete_dsn = T,
126+
delete_layer = T,
127+
quiet = T
128+
)
129+
130+
}

data_prep/R/support_fun.R

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#### Support functions to use in the preprocessing of the data
22

3-
# library(dplyr)
4-
# library(data.table)
3+
library(dplyr)
4+
library(data.table)
55
# library(mapview)
66
# mapviewOptions(platform = 'deckgl')
77

@@ -146,7 +146,7 @@ add_region_info <- function(temp_sf, column){
146146
code_region==2, 'Nordeste',
147147
code_region==3, 'Sudeste',
148148
code_region==4, 'Sul',
149-
code_region==5, 'Centro Oeste',
149+
code_region==5, 'Centro-Oeste',
150150
default = NA))
151151
return(temp_sf)
152152
}

r-package/DESCRIPTION

+1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ Suggests:
4646
rmarkdown,
4747
scales,
4848
testthat
49+
RoxygenNote: 7.3.1
4950
RoxygenNote: 7.2.3
5051
Roxygen: list(markdown = TRUE)
5152
VignetteBuilder: knitr

r-package/NEWS.md

+12
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
# geobr v1.9.0
2+
3+
**Major changes**
4+
5+
- Function `read_health_facilities()` now has a new parameter `date`, which will allow users to access data for different dates of reference. The plan is to have at least one update of this data set per year.
6+
7+
8+
**New data**
9+
- schools for 2023
10+
- health facilities for 202303
11+
12+
113
# geobr v1.8.2
214

315
**CRAN request**

r-package/R/read_health_facilities.R

+6-3
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
#' update is registered in the database in the columns `date_update` and
1717
#' `year_update`. More information in the CNES data set available at \url{https://dados.gov.br/}.
1818
#' These data use Geodetic reference system "SIRGAS2000" and CRS(4674).
19+
#'
20+
#' @param date Numeric. Date of the data in YYYYMM format. Defaults to `202303`,
21+
#' which was the latest data available by the time of this update.
1922
#' @template showProgress
2023
#'
2124
#' @return An `"sf" "data.frame"` object
@@ -25,12 +28,12 @@
2528
#'
2629
#' @examplesIf identical(tolower(Sys.getenv("NOT_CRAN")), "true")
2730
#' # Read all health facilities of the whole country
28-
#' h <- read_health_facilities()
31+
#' h <- read_health_facilities( date = 202303)
2932
#'
30-
read_health_facilities <- function( showProgress=TRUE ){
33+
read_health_facilities <- function(date = 202303, showProgress = TRUE){
3134

3235
# Get metadata with data url addresses
33-
temp_meta <- select_metadata(geography="health_facilities", year=2015, simplified=F)
36+
temp_meta <- select_metadata(geography="health_facilities", year=date, simplified=F)
3437

3538
# list paths of files to download
3639
file_url <- as.character(temp_meta$download_path)

r-package/R/utils.R

+3-3
Original file line numberDiff line numberDiff line change
@@ -38,16 +38,16 @@ select_data_type <- function(temp_meta, simplified=NULL){
3838
select_year_input <- function(temp_meta, y=year){
3939

4040
# NULL
41-
if (is.null(y)){ stop(paste0("Error: Invalid Value to argument 'year'. It must be one of the following: ",
41+
if (is.null(y)){ stop(paste0("Error: Invalid Value to argument 'year/date'. It must be one of the following: ",
4242
paste(unique(temp_meta$year),collapse = " "))) }
4343

4444
# invalid input
45-
else if (y %in% temp_meta$year){ message(paste0("Using year ", y))
45+
else if (y %in% temp_meta$year){ message(paste0("Using year/date ", y))
4646
temp_meta <- subset(temp_meta, year == y)
4747
return(temp_meta) }
4848

4949
# invalid input
50-
else { stop(paste0("Error: Invalid Value to argument 'year'. It must be one of the following: ",
50+
else { stop(paste0("Error: Invalid Value to argument 'year/date'. It must be one of the following: ",
5151
paste(unique(temp_meta$year), collapse = " ")))
5252
}
5353
}

r-package/man/geobr.Rd

-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r-package/man/read_health_facilities.Rd

+5-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)