This repository has been archived by the owner on Oct 12, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpull_assessments.R
111 lines (96 loc) · 5.36 KB
/
pull_assessments.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# This script uses the EPA public API to collect and clean data on
# Integrated Reporting (IR) to the EPA under the Clean Water Act
# Sections 303(d), 305(b) and 314. The primary cleaning operation is transforming
# the data from .json format to .csv, in addition to selecting only certain
# features of interest.
#
# Note: this script only pulls assessments which resulted in a use support
# determination of 'Not Attaining.'
#
# Related script: `pull_actions.R`.
#
# Author: Ryan Treves
# Updated: 09/09/22
library(jsonlite)
library(tidyr)
library(dplyr)
library(plyr)
# Note 'VI'= Virgin Islands, 'PR'= Puerto Rico
states <- c('AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA',
'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME',
'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY',
'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'PR', 'RI', 'SC', 'SD', 'TN',
'TX', 'UT', 'VT', 'VA', 'VI', 'WA', 'WV', 'WI', 'WY')
data <- tibble()
for (state in states) {
state_data <- tibble()
# Biennial CWA 305(b) assessments started in 2002
for (year in c('2002', '2004', '2006', '2008', '2010', '2012', '2014',
'2016', '2018', '2020', '2022')) {
state_year_data <- tibble()
# We want assessments that resulted in a use support determination of 'Not
# Supporting'
raw <- fromJSON(paste('https://attains.epa.gov/attains-public/api/assessments?useSupport=N&state=',
state, '&reportingCycle=', year, sep=""))
# If there exist IR5 assessments for the given reporting cycle and state
if (raw$count != 0){
# We're interested in information that is encoded into nested dataframes-
# so we unnest them
state_year_data <- unnest(unnest(unnest(raw$items, assessments, names_repair='universal'),
useAttainments, names_repair='universal', keep_empty = T),
parameters, names_repair='universal', keep_empty = T)
state_year_data <- state_year_data %>% filter(state_year_data$useAttainmentCode == 'N')
# In addition, some nested dataframes are mostly unneeded information,
# so we can extract the variables directly
if (('assessmentMetadata' %in% colnames(state_year_data)) &&
(typeof(state_year_data$assessmentMetadata) == 'list') &&
(typeof(state_year_data$assessmentMetadata$assessmentActivity) == 'list')) {
state_year_data$assessment_date <- state_year_data$assessmentMetadata$assessmentActivity$assessmentDate
} else {
state_year_data['assessment_date'] <- NA
}
if ((typeof(state_year_data$impairedWatersInformation) == 'list') &&
(typeof(state_year_data$impairedWatersInformation$listingInformation) == 'list')) {
state_year_data$cycle_first_listed <- state_year_data$impairedWatersInformation$listingInformation$cycleFirstListedText
state_year_data$cycle_scheduled_for_TMDL <- state_year_data$impairedWatersInformation$listingInformation$cycleScheduledForTMDLText
} else {
state_year_data['cycle_first_listed'] <- NA
state_year_data['cycle_scheduled_for_TMDL'] <- NA
}
# Collect information on associated actions
if ('associatedActions' %in% colnames(state_year_data)) {
state_year_data$associatedActions <- sapply(state_year_data$associatedActions, as.data.frame)
state_year_data <- unnest(state_year_data, associatedActions, names_repair = 'universal', keep_empty=T)
if (!('associatedActionIdentifier' %in% colnames(state_year_data))) {
state_year_data['associatedActionIdentifier'] <- NA
}
} else {
state_year_data['associatedActionIdentifier'] <- NA
}
# Create a state variable for convenience
state_year_data$state_code <- state
# Select down to variables of interest
state_year_data <- select(state_year_data, any_of(c('state_code',
'organizationIdentifier',
'organizationTypeText',
'reportingCycleText',
'assessmentUnitIdentifier',
'useName',
'useAttainmentCode',
'epaIRCategory',
'associatedActionIdentifier',
'threatenedIndicator',
'parameterStatusName',
'parameterName',
'cycle_first_listed',
'cycleLastAssessedText',
'cycle_scheduled_for_TMDL',
'assessment_date')))
state_data <- plyr::rbind.fill(state_data, state_year_data)
}
}
write.csv(state_data, paste(state, '_NotSupporting_assessments.csv', sep=""))
data <- plyr::rbind.fill(data, state_data)
print(paste(state, 'done'))
}
write.csv(data, 'all_NotSupporting_assessments.csv')