-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtidycensus_Example2.r
272 lines (251 loc) · 19.5 KB
/
tidycensus_Example2.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
######################################################
# tidycensus_Example2.r
# Example #2
# More Complex Examples of TIDYCENSUS
# Multiple Years, Multiple Geographies, Multiple Variables
# Each record (row) is one geography, with columns for
# each variable/year combination, eg, TotalPop_05_E, TotalPop_07_E, etc.
# Eight variables by 14 years (2005-2018)
# Prepared by Chuck Purvis, Hayward, California
######################################################
# Step 0: Load relevant libraries into each R-session.
library(tidyverse)
library(tidycensus)
# The get_acs function is run for each year of the single-year ACS data, from 2005 to 2018.
# Note that group quarters data was not collected in 2005, but started in 2006.
# Note the "_05_" included in the variable name in the first data "pull". That's a mnemonic
# device that tells us it's for the year 2005.
# Example 2.1 through 2.14: Run get_acs for large California Places, 2005-2018
# Example 2.15: Merge together data frames into a VERY wide database...lots of columns!
# Example 2.16: Merge in a file of Large San Francisco Bay Area places, and subset file.
#-------------------------------------------------------------------------------------------
place05 <- get_acs(survey="acs1", year=2005, geography = "place", state = "CA",
show_call = TRUE,output="wide",
variables = c(TotalPop_05_ = "B06001_001", # Total Population
Med_HHInc_05_ = "B19013_001", # Median Household Income
Agg_HHInc_05_ = "B19025_001", # Aggregate Household Income
HHldPop_05_ = "B11002_001", # Population in Households
Househlds_05_ = "B25003_001", # Total Households
Owner_OccDU_05_= "B25003_002", # Owner-Occupied Dwelling Units
Rent_OccDU_05_ = "B25003_003", # Renter-Occupied Dwelling Units
Med_HHVal_05_ = "B25077_001")) # Median Value of Owner-Occ DUs
place05$Avg_HHSize_05 <- place05$HHldPop_05_E / place05$Househlds_05_E
place05$MeanHHInc_05 <- place05$Agg_HHInc_05_E / place05$Househlds_05_E
#-------------------------------------------------------------------------------------------
place06 <- get_acs(survey="acs1", year=2006, geography = "place", state = "CA",
show_call = TRUE,output="wide",
variables = c(TotalPop_06_ = "B06001_001", # Total Population
Med_HHInc_06_ = "B19013_001", # Median Household Income
Agg_HHInc_06_ = "B19025_001", # Aggregate Household Income
HHldPop_06_ = "B11002_001", # Population in Households
Househlds_06_ = "B25003_001", # Total Households
Owner_OccDU_06_= "B25003_002", # Owner-Occupied Dwelling Units
Rent_OccDU_06_ = "B25003_003", # Renter-Occupied Dwelling Units
Med_HHVal_06_ = "B25077_001")) # Median Value of Owner-Occ DUs
place06$Avg_HHSize_06 <- place06$HHldPop_06_E / place06$Househlds_06_E
place06$MeanHHInc_06 <- place06$Agg_HHInc_06_E / place06$Househlds_06_E
#-------------------------------------------------------------------------------------------
place07 <- get_acs(survey="acs1", year=2007, geography = "place", state = "CA",
show_call = TRUE,output="wide",
variables = c(TotalPop_07_ = "B06001_001", # Total Population
Med_HHInc_07_ = "B19013_001", # Median Household Income
Agg_HHInc_07_ = "B19025_001", # Aggregate Household Income
HHldPop_07_ = "B11002_001", # Population in Households
Househlds_07_ = "B25003_001", # Total Households
Owner_OccDU_07_= "B25003_002", # Owner-Occupied Dwelling Units
Rent_OccDU_07_ = "B25003_003", # Renter-Occupied Dwelling Units
Med_HHVal_07_ = "B25077_001")) # Median Value of Owner-Occ DUs
place07$Avg_HHSize_07 <- place07$HHldPop_07_E / place07$Househlds_07_E
place07$MeanHHInc_07 <- place07$Agg_HHInc_07_E / place07$Househlds_07_E
#-------------------------------------------------------------------------------------------
place08 <- get_acs(survey="acs1", year=2008, geography = "place", state = "CA",
show_call = TRUE,output="wide",
variables = c(TotalPop_08_ = "B06001_001", # Total Population
Med_HHInc_08_ = "B19013_001", # Median Household Income
Agg_HHInc_08_ = "B19025_001", # Aggregate Household Income
HHldPop_08_ = "B11002_001", # Population in Households
Househlds_08_ = "B25003_001", # Total Households
Owner_OccDU_08_= "B25003_002", # Owner-Occupied Dwelling Units
Rent_OccDU_08_ = "B25003_003", # Renter-Occupied Dwelling Units
Med_HHVal_08_ = "B25077_001")) # Median Value of Owner-Occ DUs
place08$Avg_HHSize_08 <- place08$HHldPop_08_E / place08$Househlds_08_E
place08$MeanHHInc_08 <- place08$Agg_HHInc_08_E / place08$Househlds_08_E
#-------------------------------------------------------------------------------------------
place09 <- get_acs(survey="acs1", year=2009, geography = "place", state = "CA",
show_call = TRUE,output="wide",
variables = c(TotalPop_09_ = "B06001_001", # Total Population
Med_HHInc_09_ = "B19013_001", # Median Household Income
Agg_HHInc_09_ = "B19025_001", # Aggregate Household Income
HHldPop_09_ = "B11002_001", # Population in Households
Househlds_09_ = "B25003_001", # Total Households
Owner_OccDU_09_= "B25003_002", # Owner-Occupied Dwelling Units
Rent_OccDU_09_ = "B25003_003", # Renter-Occupied Dwelling Units
Med_HHVal_09_ = "B25077_001")) # Median Value of Owner-Occ DUs
place09$Avg_HHSize_09 <- place09$HHldPop_09_E / place09$Househlds_09_E
place09$MeanHHInc_09 <- place09$Agg_HHInc_09_E / place09$Househlds_09_E
#-------------------------------------------------------------------------------------------
place10 <- get_acs(survey="acs1", year=2010, geography = "place", state = "CA",
show_call = TRUE,output="wide",
variables = c(TotalPop_10_ = "B06001_001", # Total Population
Med_HHInc_10_ = "B19013_001", # Median Household Income
Agg_HHInc_10_ = "B19025_001", # Aggregate Household Income
HHldPop_10_ = "B11002_001", # Population in Households
Househlds_10_ = "B25003_001", # Total Households
Owner_OccDU_10_= "B25003_002", # Owner-Occupied Dwelling Units
Rent_OccDU_10_ = "B25003_003", # Renter-Occupied Dwelling Units
Med_HHVal_10_ = "B25077_001")) # Median Value of Owner-Occ DUs
place10$Avg_HHSize_10 <- place10$HHldPop_10_E / place10$Househlds_10_E
place10$MeanHHInc_10 <- place10$Agg_HHInc_10_E / place10$Househlds_10_E
#-------------------------------------------------------------------------------------------
place11 <- get_acs(survey="acs1", year=2011, geography = "place", state = "CA",
show_call = TRUE,output="wide",
variables = c(TotalPop_11_ = "B06001_001", # Total Population
Med_HHInc_11_ = "B19013_001", # Median Household Income
Agg_HHInc_11_ = "B19025_001", # Aggregate Household Income
HHldPop_11_ = "B11002_001", # Population in Households
Househlds_11_ = "B25003_001", # Total Households
Owner_OccDU_11_= "B25003_002", # Owner-Occupied Dwelling Units
Rent_OccDU_11_ = "B25003_003", # Renter-Occupied Dwelling Units
Med_HHVal_11_ = "B25077_001")) # Median Value of Owner-Occ DUs
place11$Avg_HHSize_11 <- place11$HHldPop_11_E / place11$Househlds_11_E
place11$MeanHHInc_11 <- place11$Agg_HHInc_11_E / place11$Househlds_11_E
#-------------------------------------------------------------------------------------------
place12 <- get_acs(survey="acs1", year=2012, geography = "place", state = "CA",
show_call = TRUE,output="wide",
variables = c(TotalPop_12_ = "B06001_001", # Total Population
Med_HHInc_12_ = "B19013_001", # Median Household Income
Agg_HHInc_12_ = "B19025_001", # Aggregate Household Income
HHldPop_12_ = "B11002_001", # Population in Households
Househlds_12_ = "B25003_001", # Total Households
Owner_OccDU_12_= "B25003_002", # Owner-Occupied Dwelling Units
Rent_OccDU_12_ = "B25003_003", # Renter-Occupied Dwelling Units
Med_HHVal_12_ = "B25077_001")) # Median Value of Owner-Occ DUs
place12$Avg_HHSize_12 <- place12$HHldPop_12_E / place12$Househlds_12_E
place12$MeanHHInc_12 <- place12$Agg_HHInc_12_E / place12$Househlds_12_E
#-------------------------------------------------------------------------------------------
place13 <- get_acs(survey="acs1", year=2013, geography = "place", state = "CA",
show_call = TRUE,output="wide",
variables = c(TotalPop_13_ = "B06001_001", # Total Population
Med_HHInc_13_ = "B19013_001", # Median Household Income
Agg_HHInc_13_ = "B19025_001", # Aggregate Household Income
HHldPop_13_ = "B11002_001", # Population in Households
Househlds_13_ = "B25003_001", # Total Households
Owner_OccDU_13_= "B25003_002", # Owner-Occupied Dwelling Units
Rent_OccDU_13_ = "B25003_003", # Renter-Occupied Dwelling Units
Med_HHVal_13_ = "B25077_001")) # Median Value of Owner-Occ DUs
place13$Avg_HHSize_13 <- place13$HHldPop_13_E / place13$Househlds_13_E
place13$MeanHHInc_13 <- place13$Agg_HHInc_13_E / place13$Househlds_13_E
#-------------------------------------------------------------------------------------------
place14 <- get_acs(survey="acs1", year=2014, geography = "place", state = "CA",
show_call = TRUE,output="wide",
variables = c(TotalPop_14_ = "B06001_001", # Total Population
Med_HHInc_14_ = "B19013_001", # Median Household Income
Agg_HHInc_14_ = "B19025_001", # Aggregate Household Income
HHldPop_14_ = "B11002_001", # Population in Households
Househlds_14_ = "B25003_001", # Total Households
Owner_OccDU_14_= "B25003_002", # Owner-Occupied Dwelling Units
Rent_OccDU_14_ = "B25003_003", # Renter-Occupied Dwelling Units
Med_HHVal_14_ = "B25077_001")) # Median Value of Owner-Occ DUs
place14$Avg_HHSize_14 <- place14$HHldPop_14_E / place14$Househlds_14_E
place14$MeanHHInc_14 <- place14$Agg_HHInc_14_E / place14$Househlds_14_E
#-------------------------------------------------------------------------------------------
place15 <- get_acs(survey="acs1", year=2015, geography = "place", state = "CA",
show_call = TRUE,output="wide",
variables = c(TotalPop_15_ = "B06001_001", # Total Population
Med_HHInc_15_ = "B19013_001", # Median Household Income
Agg_HHInc_15_ = "B19025_001", # Aggregate Household Income
HHldPop_15_ = "B11002_001", # Population in Households
Househlds_15_ = "B25003_001", # Total Households
Owner_OccDU_15_= "B25003_002", # Owner-Occupied Dwelling Units
Rent_OccDU_15_ = "B25003_003", # Renter-Occupied Dwelling Units
Med_HHVal_15_ = "B25077_001")) # Median Value of Owner-Occ DUs
place15$Avg_HHSize_15 <- place15$HHldPop_15_E / place15$Househlds_15_E
place15$MeanHHInc_15 <- place15$Agg_HHInc_15_E / place15$Househlds_15_E
#-------------------------------------------------------------------------------------------
place16 <- get_acs(survey="acs1", year=2016, geography = "place", state = "CA",
show_call = TRUE,output="wide",
variables = c(TotalPop_16_ = "B06001_001", # Total Population
Med_HHInc_16_ = "B19013_001", # Median Household Income
Agg_HHInc_16_ = "B19025_001", # Aggregate Household Income
HHldPop_16_ = "B11002_001", # Population in Households
Househlds_16_ = "B25003_001", # Total Households
Owner_OccDU_16_= "B25003_002", # Owner-Occupied Dwelling Units
Rent_OccDU_16_ = "B25003_003", # Renter-Occupied Dwelling Units
Med_HHVal_16_ = "B25077_001")) # Median Value of Owner-Occ DUs
place16$Avg_HHSize_16 <- place16$HHldPop_16_E / place16$Househlds_16_E
place16$MeanHHInc_16 <- place16$Agg_HHInc_16_E / place16$Househlds_16_E
#-------------------------------------------------------------------------------------------
place17 <- get_acs(survey="acs1", year=2017, geography = "place", state = "CA",
show_call = TRUE,output="wide",
variables = c(TotalPop_17_ = "B06001_001", # Total Population
Med_HHInc_17_ = "B19013_001", # Median Household Income
Agg_HHInc_17_ = "B19025_001", # Aggregate Household Income
HHldPop_17_ = "B11002_001", # Population in Households
Househlds_17_ = "B25003_001", # Total Households
Owner_OccDU_17_= "B25003_002", # Owner-Occupied Dwelling Units
Rent_OccDU_17_ = "B25003_003", # Renter-Occupied Dwelling Units
Med_HHVal_17_ = "B25077_001")) # Median Value of Owner-Occ DUs
place17$Avg_HHSize_17 <- place17$HHldPop_17_E / place17$Househlds_17_E
place17$MeanHHInc_17 <- place17$Agg_HHInc_17_E / place17$Househlds_17_E
#-------------------------------------------------------------------------------------------
place18 <- get_acs(survey="acs1", year=2018, geography = "place", state = "CA",
show_call = TRUE,output="wide",
variables = c(TotalPop_18_ = "B06001_001", # Total Population
Med_HHInc_18_ = "B19013_001", # Median Household Income
Agg_HHInc_18_ = "B19025_001", # Aggregate Household Income
HHldPop_18_ = "B11002_001", # Population in Households
Househlds_18_ = "B25003_001", # Total Households
Owner_OccDU_18_= "B25003_002", # Owner-Occupied Dwelling Units
Rent_OccDU_18_ = "B25003_003", # Renter-Occupied Dwelling Units
Med_HHVal_18_ = "B25077_001")) # Median Value of Owner-Occ DUs
place18$Avg_HHSize_18 <- place18$HHldPop_18_E / place18$Househlds_18_E
place18$MeanHHInc_18 <- place18$Agg_HHInc_18_E / place18$Househlds_18_E
#####################################################################################
# Example 2.15: Merge together data frames into a VERY wide database...lots of columns!
# Merge the dataframes, adding a year in each step. All=TRUE is needed if # of places is different.
#
# (R-language newbie script...There are probably more terse/exotic ways of doing this!)
place0506 <- merge(place05, place06, by = c('GEOID','NAME'), all=TRUE)
place0507 <- merge(place0506,place07, by = c('GEOID','NAME'), all=TRUE)
place0508 <- merge(place0507,place08, by = c('GEOID','NAME'), all=TRUE)
place0509 <- merge(place0508,place09, by = c('GEOID','NAME'), all=TRUE)
place0510 <- merge(place0509,place10, by = c('GEOID','NAME'), all=TRUE)
place0511 <- merge(place0510,place11, by = c('GEOID','NAME'), all=TRUE)
place0512 <- merge(place0511,place12, by = c('GEOID','NAME'), all=TRUE)
place0513 <- merge(place0512,place13, by = c('GEOID','NAME'), all=TRUE)
place0514 <- merge(place0513,place14, by = c('GEOID','NAME'), all=TRUE)
place0515 <- merge(place0514,place15, by = c('GEOID','NAME'), all=TRUE)
place0516 <- merge(place0515,place16, by = c('GEOID','NAME'), all=TRUE)
place0517 <- merge(place0516,place17, by = c('GEOID','NAME'), all=TRUE)
place0518 <- merge(place0517,place18, by = c('GEOID','NAME'), all=TRUE)
place_all <- place0518
View(place_all)
# The following functions output useful lists to the R-studio console which can then be edited
names(place_all)
dput(names(place_all)) # most useful for subsetting variables
# The purpose here is to re-order and select variables into a much more compact
# database, for eventual exporting into a CSV file, and then into Excel for finishing touches.
selvars <- c("GEOID", "NAME",
"TotalPop_05_E", "TotalPop_06_E", "TotalPop_07_E", "TotalPop_08_E",
"TotalPop_09_E", "TotalPop_10_E", "TotalPop_11_E", "TotalPop_12_E",
"TotalPop_13_E", "TotalPop_14_E", "TotalPop_15_E", "TotalPop_16_E",
"TotalPop_17_E", "TotalPop_18_E")
# note the brackets for outputing new data frame from previous data frame....
place_all2 <- place_all[selvars]
# View the Selected Variables Table
View(place_all2)
# Set directory for exported data files, MacOS directory style
setwd("~/Desktop/tidycensus_work/output")
# Export the data frames to CSV files, for importing to Excel, and applying finishing touches
write.csv(place_all2,"ACS_AllYears_TotalPop_Calif_Places.csv")
write.csv(place_all, "ACS_AllYears_BaseVar_Calif_Places.csv")
#####################################################################################
# Example 2.16: Merge in a file of Large San Francisco Bay Area places, and subset file.
# Read in a file with the Large SF Bay Area Places, > 65,000 population
# and merge with the All Large California Places
bayplace <- read.csv("BayArea_Places_65K.csv")
Bayplace1 <- merge(bayplace,place_all, by = c('NAME'))
Bayplace2 <- merge(bayplace,place_all2, by = c('NAME'))
write.csv(Bayplace1,"ACS_AllYears_BaseVar_BayArea_Places.csv")
write.csv(Bayplace2,"ACS_AllYears_TotalPop_BayArea_Places.csv")
#####################################################################################