-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathDE_read_data.R
119 lines (86 loc) · 4.75 KB
/
DE_read_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
###### Prepare German pop. data for sdcSpatial #####
# ----- load libraries -----
library(sf) # handling shape data
library(data.table) # fast import and data manipulation
library(dplyr) # convenience
library(ggplot2) # (optional) visualization
# ----- get required data -----
data_folder <- file.path(getwd(), "sdcSpatialExperiment_DE_ext") # customize if needed
web_pop <- paste0("https://www.zensus2011.de/SharedDocs/Downloads/DE/Pressemitteilung/",
"DemografischeGrunddaten/csv_Bevoelkerung_100m_Gitter.zip",
"?__blob=publicationFile&v=2")
web_shp <- paste0("https://daten.gdz.bkg.bund.de/produkte/vg/vg250_ebenen_0101/aktuell/",
"vg250_01-01.utm32s.shape.ebenen.zip")
web_100km <- paste0("https://daten.gdz.bkg.bund.de/produkte/sonstige/geogitter/aktuell/",
"DE_Grid_ETRS89-LAEA_100km.gpkg.zip")
web_100m <- paste0("https://daten.gdz.bkg.bund.de/produkte/sonstige/geogitter/aktuell/",
"DE_Grid_ETRS89-LAEA_100m.csv.zip")
# download files (may need to raise timeout in R options for the larger files)
download.file(web_pop, paste0(data_folder, "/csv_Bevoelkerung_100m_Gitter.zip"),
mode = "wb") # ~ 105 MB
download.file(web_shp, paste0(data_folder, "/vg250_01-01.utm32s.shape.ebenen.zip"),
mode = "wb") # ~ 66 MB
download.file(web_100km, paste0(data_folder, "/DE_Grid_ETRS89-LAEA_100km.gpkg.zip"),
mode = "wb") # ~ 1 MB
download.file(web_100m, paste0(data_folder, "/DE_Grid_ETRS89-LAEA_100m.zip"),
mode = "wb") # ~ 291 MB
# unzip files
unzip(paste0(data_folder, "/csv_Bevoelkerung_100m_Gitter.zip"), exdir = data_folder)
unzip(paste0(data_folder, "/vg250_01-01.utm32s.shape.ebenen.zip"), exdir = data_folder)
unzip(paste0(data_folder, "/DE_Grid_ETRS89-LAEA_100km.gpkg.zip"), exdir = data_folder)
unzip(paste0(data_folder, "/DE_Grid_ETRS89-LAEA_100m.zip"), exdir = data_folder)
path_grid_100m <- "DE_Grid_ETRS89-LAEA_100m/geogitter"
shp_grid_100km <- "DE_Grid_ETRS89-LAEA_100km.gpkg/geogitter/DE_Grid_ETRS89-LAEA_100km.gpkg"
shp_admi <- "vg250_01-01.utm32s.shape.ebenen/vg250_ebenen_0101/VG250_LAN.shp"
data_pop <- "Zensus_Bevoelkerung_100m-Gitter.csv"
# ----- select data -----
# The 100m x 100m grid is delivered in 100km x 100km chunks for the sake of file size.
# We start with the large grid in order to decide which chunks to load.
# read large grid
grid_100km <- st_read(file.path(data_folder, shp_grid_100km))
# In order to do the analysis for a single federal state, load state borders
admi_data <- st_read(file.path(data_folder, shp_admi)) %>%
filter(GF == 4) %>% # use only land area
st_transform(st_crs(grid_100km)) # UTM to LAEA
# select a federal state to work with
admi_data$GEN
(fedstate <- admi_data$GEN[1])
fedstate_ags <- admi_data[admi_data$GEN == fedstate, ]$AGS # state identifier
# which 100km x 100km must be loaded for that state?
grid_load <- grid_100km[st_intersects(admi_data[admi_data$GEN == fedstate, ],
grid_100km)[[1]], ]$id
# (optional) visualize the selected chunks
ggplot() +
geom_sf(data = grid_100km) +
geom_sf(data = admi_data, fill = NA) +
geom_sf(data = admi_data[admi_data$AGS == fedstate_ags, ], fill = "cyan") +
geom_sf(data = grid_100km[grid_100km$id %in% grid_load, ],
fill = NA, color = "red")
# ----- read data -----
# read in selected chunks
grd <- vector("list", length = length(grid_load))
for(i in seq(grid_load)) {
grd[[i]] <- fread(file.path(data_folder, path_grid_100m,
paste0(grid_load[i], "_DE_Grid_ETRS89-LAEA_100m.csv")),
colClasses = c("character", rep("numeric", 10), "character"))
}
# combining selected chunks
grid_100m <- dplyr::bind_rows(grd)[, c(1:5, 12)]
names(grid_100m) <- c("cell_id", "x_sw", "y_sw", "x_mp", "y_mp", "ags")
# get rid of overlaps with other fed. states
grid_100m <- filter(grid_100m, substr(ags, 1, 2) == fedstate_ags)
# ----- add pop. numbers -----
# read in population data
pop <- fread(file.path(data_folder, data_pop)) %>%
dplyr::select(Gitter_ID_100m, Einwohner) %>%
dplyr::filter(Einwohner > 0)
# merge population to grid cells
pop_100m <- merge(grid_100m, pop, by.x = "cell_id", by.y = "Gitter_ID_100m")
sum(pop_100m$Einwohner) # inhabitants of selected state (w/o suppressed cells)
# expand to record-level data
pop_100m <- pop_100m[, .(1:Einwohner, x_mp, y_mp), by = .(cell_id)] %>%
rename(person_id_cell = V1)
pop_100m$person_id <- 1:nrow(pop_100m)
# ----- save data -----
save(pop_100m, grid_100m, file = paste0(data_folder, "/pop_data_DE.RData"), compress = TRUE)
rm(list = ls())