diff --git a/FinaliseTrend.R b/FinaliseTrend.R index 2544f11..1df77ed 100644 --- a/FinaliseTrend.R +++ b/FinaliseTrend.R @@ -46,24 +46,24 @@ EndYear <- 2016 # Trend pass/fail logs vm<-read.csv(file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/validDataForTrend_monthly_",StartYear,"-",EndYear,".csv",sep="")) -vb<-read.csv(file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/validDataForTrend_bimonthly_",StartYear,"-",EndYear,".csv",sep="")) +#vb<-read.csv(file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/validDataForTrend_bimonthly_",StartYear,"-",EndYear,".csv",sep="")) vq<-read.csv(file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/validDataForTrend_quarterly_",StartYear,"-",EndYear,".csv",sep="")) # Trend results m<-read.csv(file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trend_monthly_",StartYear,"-",EndYear,".csv",sep="")) -b<-read.csv(file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trend_bimonthly_",StartYear,"-",EndYear,".csv",sep="")) +#b<-read.csv(file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trend_bimonthly_",StartYear,"-",EndYear,".csv",sep="")) q<-read.csv(file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trend_quarterly_",StartYear,"-",EndYear,".csv",sep="")) m$m<-12 -b$m<-6 +#b$m<-6 q$m<-4 # Append Dataframes -vmbq5 <- rbind.data.frame(vm,vb,vq) +#vmbq5 <- rbind.data.frame(vm,vb,vq) vmbq5 <- rbind.data.frame(vm,vq) vmbq5$period<-5 -mbq5 <- rbind.data.frame(m,b,q) +#mbq5 <- rbind.data.frame(m,b,q) # Dropping bimonthly from finalised trend - NIWA recommendation mbq5 <- rbind.data.frame(m,q) mbq5$period<-5 @@ -76,27 +76,27 @@ StartYear <- 2007 EndYear <- 2016 vm<-read.csv(file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/validDataForTrend_monthly_",StartYear,"-",EndYear,".csv",sep="")) -vb<-read.csv(file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/validDataForTrend_bimonthly_",StartYear,"-",EndYear,".csv",sep="")) +#vb<-read.csv(file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/validDataForTrend_bimonthly_",StartYear,"-",EndYear,".csv",sep="")) vq<-read.csv(file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/validDataForTrend_quarterly_",StartYear,"-",EndYear,".csv",sep="")) m<-read.csv(file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trend_monthly_",StartYear,"-",EndYear,".csv",sep="")) -b<-read.csv(file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trend_bimonthly_",StartYear,"-",EndYear,".csv",sep="")) +#b<-read.csv(file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trend_bimonthly_",StartYear,"-",EndYear,".csv",sep="")) q<-read.csv(file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trend_quarterly_",StartYear,"-",EndYear,".csv",sep="")) m$m<-12 -b$m<-6 +#b$m<-6 q$m<-4 # Append Dataframes -vmbq9 <- rbind.data.frame(vm,vb,vq) +#vmbq9 <- rbind.data.frame(vm,vb,vq) vmbq9 <- rbind.data.frame(vm,vq) vmbq9$period<-10 -mbq9 <- rbind.data.frame(m,b,q) +#mbq9 <- rbind.data.frame(m,b,q) # Dropping bimonthly from finalised trend - NIWA recommendation mbq9 <- rbind.data.frame(m,q) mbq9$period<-10 -rm(m,b,q,vm,vb,vq) +rm(m,q,vm,vq) mbq <- rbind.data.frame(mbq5,mbq9) vmbq <- rbind.data.frame(vmbq5,vmbq9) @@ -107,9 +107,33 @@ vmbq <- rbind.data.frame(vmbq5,vmbq9) # ======================== # Load exclusion file -exclusions <- read.csv(file="//file/herman/R/OA/08/02/2017/Water Quality/R/lawa_state/TrendExclusion2016.csv",stringsAsFactors=FALSE) +exclusions <- read.csv(file="//file/herman/R/OA/08/02/2017/Water Quality/R/lawa_state/TrendExclusion2017.csv",stringsAsFactors=FALSE) + +## BEGIN CODE ADDITION ID==CawthronSwitchTrendOutput ------------ +## ID: CawthronSwitchTrendOutput +## DATE: 2018-02-14 +## BY: Sean Hodges +## Horizons Regional Council +## DESC: Councils may choose not to have trend data presented on LAWA. The LAWA code +## uses an exclusion file to drop data from agencies, regions and sites at +## each Councils request. This code addition introduces a switch to turn off +## exclusions based on regions. +## OPTIONAL STEP +## Created to address request from Cawthron, and endorsed by EMAR Project Manager, +## for all trend data from all councils, irrespective of Taranaki's exclusion +## +## Create a variable to control execution of this additional code. +## This would best be handled by a config or ini file for this code, but for +## time-being, we'll hardcode directly in this script. +## OPTIONAL STEP STATUS +CawthronSwitchTrendOutput <- TRUE + +if(CawthronSwitchTrendOutput==TRUE){ +exclusions <- exclusions[exclusions$Type!="Region",] +} +## END CODE ADDITION -# Mark records for exclusion +# Mark records for exclusion------------ mbq$Exclusion <- FALSE xagency<-exclusions[exclusions$Type=="Agency",] xregion<-exclusions[exclusions$Type=="Region",] @@ -143,47 +167,101 @@ cat("Dropping records from trends as requested by Councils:\n",sum(mbq$Exclusion mbq <- mbq[mbq$Exclusion==FALSE,] # <-- only retain those records where the exclusion column contains a FALSE value. # ======================== - +# Significant results now a deprecated approach based on Larned et al (2015) # Flag significant results -mbq$p.sig <- ifelse(mbq$p.value<=0.05,1,0) +#mbq$p.sig <- ifelse(mbq$p.value<=0.05,1,0) # Summary for significant results at a catchment level -s <- summaryBy(p.sig~period+Catchment+Parameter+m, +# s <- summaryBy(p.sig~period+Catchment+Parameter+m, +# data=mbq, +# FUN=c(sum), na.rm=TRUE, keep.name=TRUE) +# t <- summaryBy(p.sig~period+Catchment+Parameter+m, +# data=mbq, +# FUN=c(length), keep.name=TRUE) +# u <- summaryBy(Sen.Pct~period+Catchment+Parameter+m, +# data=mbq, +# FUN=c(mean), keep.name=TRUE) + +# Flag results outside the 100(1–2α)% confidence interval for trend direction-testing +mbq$trend.flag <- ifelse(mbq$zeroLocationCL90,0,1) + +# For catchments, this summaryBy statement sums up how many +# sites have a trend direction (but does not indicate what direction) +# The trend.flag column is populate with 1's and 0's +s <- summaryBy(trend.flag~period+Catchment+Parameter+m, data=mbq, FUN=c(sum), na.rm=TRUE, keep.name=TRUE) -t <- summaryBy(p.sig~period+Catchment+Parameter+m, + +# For catchments, this summaryBy statement determines how many (vector length) +# site/measurement combos have had trend estimation applied. +t <- summaryBy(trend.flag~period+Catchment+Parameter+m, data=mbq, FUN=c(length), keep.name=TRUE) -u <- summaryBy(Sen.Pct~period+Catchment+Parameter+m, + +# For catchments, this summaryBy statement determines the median +# index value for -1, 0 or 1 trend scores ## CONFIRM APPROACH +# The sign of the resulting median will then give direction of catchment trend +u <- summaryBy(TrendScore~period+Catchment+Parameter+m, data=mbq, - FUN=c(mean), keep.name=TRUE) -s$count<-t$p.sig -s$p.flag <- s$p.sig/s$count -s$Sen.Pct <- u$Sen.Pct + FUN=c(median), keep.name=TRUE) + + +s$Samples<-t$trend.flag # How many sites have trends estimates applied in Catchment +s$Trend.Proportion <- s$trend.flag/s$Samples # proportion of site/parameter combos with trend direction in catchment +s$Direction <- u$TrendScore # sign of this value gives direction for trend in catchment v<-calcTrendScoreAggregate(s) -# Summary for significant results at a regional level -s <- summaryBy(p.sig~period+Region+Parameter+m, +# # Summary for significant results at a regional level +# s <- summaryBy(p.sig~period+Region+Parameter+m, +# data=mbq, +# FUN=c(sum), na.rm=TRUE, keep.name=TRUE) +# t <- summaryBy(p.sig~period+Region+Parameter+m, +# data=mbq, +# FUN=c(length), keep.name=TRUE) +# u <- summaryBy(Sen.Pct~period+Region+Parameter+m, +# data=mbq, +# FUN=c(mean), keep.name=TRUE) +# s$count<-t$p.sig +# s$p.flag <- s$p.sig/s$count +# s$Sen.Pct <- u$Sen.Pct + +# # Flag results outside the 100(1–2α)% confidence interval for trend direction-testing +# For regions, this summaryBy statement sums up how many +# sites have a trend direction (but does not indicate what direction) +# The trend.flag column is populate with 1's and 0's +s <- summaryBy(trend.flag~period+Region+Parameter+m, data=mbq, FUN=c(sum), na.rm=TRUE, keep.name=TRUE) -t <- summaryBy(p.sig~period+Region+Parameter+m, + +# For regions, this summaryBy statement determines how many (vector length) +# site/measurement combos have had trend estimation applied. +t <- summaryBy(trend.flag~period+Region+Parameter+m, data=mbq, FUN=c(length), keep.name=TRUE) -u <- summaryBy(Sen.Pct~period+Region+Parameter+m, + +# For regions, this summaryBy statement determines the median +# index value for -1, 0 or 1 trend scores ## CONFIRM APPROACH +# The sign of the resulting median will then give direction of catchment trend +u <- summaryBy(TrendScore~period+Region+Parameter+m, data=mbq, - FUN=c(mean), keep.name=TRUE) -s$count<-t$p.sig -s$p.flag <- s$p.sig/s$count -s$Sen.Pct <- u$Sen.Pct + FUN=c(median), keep.name=TRUE) + + +s$Samples<-t$trend.flag # How many sites have trends estimates applied in Region +s$Trend.Proportion <- s$trend.flag/s$Samples # proportion of site/parameter combos with trend direction in Region +s$Direction <- u$TrendScore # sign of this value gives direction for trend in Region w<-calcTrendScoreAggregate(s) + w$Catchment <- "" v$Region <- "" z<-rbind.data.frame(v,w) # OUTPUT data to CSVs write.csv(vmbq,file="//file/herman/R/OA/08/02/2017/Water Quality/ROutput/validDataForTrend_combined.csv") ## LOG PASS-FAIL for Trend analysis +write.csv(vmbq,file="//file/herman/R/OA/08/02/2017/Water Quality/ROutput/SitesMeetingTrendCriteria.csv") ## LOG PASS-FAIL for Trend analysis ## A more sensible name to use when sharing this file + rm(vmbq,vmbq5,vmbq9) write.csv(mbq,file="//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trend_sites_freq_combined.csv") ## COMBINED TREND DATA FOR 5 AND 10 YEARS write.csv(z,file="//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trend_catchment_region.csv") ## @@ -194,18 +272,20 @@ write.csv(z,file="//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trend_catc mbq$Location <- mbq$LAWAID z$Location <- paste(z$Region,as.character(z$Catchment),sep="") -#c <- mbq[c(2,3,8,26,27,12,13,16,15)] # This vector needs editing if site table changes. -#c <- mbq[c(2,3,15,16,8,38,18,12,39)] # This vector needs editing if site table changes. -#c <- mbq[c(2,3,15,16,8,36,18,12,37)] # This vector needs editing if site table changes. -#c <- mbq[c(2,3,15,16,8,60,18,12,61)] +## Adding in applied quarterly/monthly frequency data +#c <- mbq[c(2,3,8,27,28,7,13,14,17,16)] -#c <- mbq[c(2,3,8,26,27,13,14,17,16)] # This vector needs editing if site table changes. +c <- mbq[c(36,3,12,32,33,11,18,19,22,21)] -c <- mbq[c(2,3,8,27,28,13,14,17,16)] -names(c) <- c("Location","Parameter","TrendScore","m","period","Altitude","Landuse","Region","Frequency") +names(c) <- c("Location","Parameter","TrendScore","m","period","Freq", + "Altitude","Landuse","Region","Frequency") #names(c) <- c("Location","Parameter","Altitude","Landuse","TrendScore","m","Frequency","Region","period") -d <- z[c(11,3,9,4,1)] # This vector needs checking if fields added or removed. +# This vector needs checking if fields added or removed +d <- z[c(11,3,9,4,1)] ## Location,Parameter,TrendScore,m,period +names(d) <- c("Location","Parameter","TrendScore","m","period") + +d$Freq<-"" d$Altitude <- "ALL" d$Landuse <- "ALL" d$Region <- "" @@ -221,4 +301,5 @@ e <- rbind.data.frame(c,d) ## SAVE FINAL FILE SUMMARISING TREND RESULTS write.csv(e,file="//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trend_sites_catchment_region.csv") + setwd(od) \ No newline at end of file diff --git a/PackageDataForDelivery.R b/PackageDataForDelivery.R index 27e38ae..76c041c 100644 --- a/PackageDataForDelivery.R +++ b/PackageDataForDelivery.R @@ -33,9 +33,16 @@ trends<-read.csv(file="//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trend # --------------------- # [2015-10-08] Remove DIN results - not required for LAWA upload - only there for River Awards & Morgan Foundation trends <- trends[trends$Parameter!="DIN",] +trends <- trends[trends$Parameter!="PH",] trends$X <- c(1:nrow(trends)) # updating row counter in data - only needed for debugging code # --------------------- +# [2017-09-07] Sort by Location,Parameter, m, period and keep Max values for 5 and 10 year months (var=m) +trend_1 <- aggregate(x=trends,by=list(trends$Location,trends$Parameter,trends$period),FUN=max)[,c(4:(4+length(trends)-1))] + + +# --------------------- +# 10 year TRENDS # [2015-10-08] Load data to determine frequency's by region, catchment and site and retain those data that match StartYear <- 2007 EndYear <- 2016 @@ -112,12 +119,20 @@ df_minfreq_Catchment <- df_minfreq_Catchment[,c(2,3,4,1,5,6,7)] # Converting location column to character df_minfreq_Catchment$Location <- as.character(df_minfreq_Catchment$Location) +# # Parameter +# df_minfreq_Parameter$Frequency<-"Quarterly" +# df_minfreq_Parameter$Frequency[df_minfreq_Parameter$Value>=10]<-"Monthly" +# # recode value here to 12 or 4 as well +# df_minfreq_Parameter$Value[df_minfreq_Parameter$Value>=10]<-12 +# df_minfreq_Parameter$Value[df_minfreq_Parameter$Value<10]<-4 +# + # Parameter df_minfreq_Parameter$Frequency<-"Quarterly" -df_minfreq_Parameter$Frequency[df_minfreq_Parameter$Value>=10]<-"Monthly" +df_minfreq_Parameter$Frequency[df_minfreq_Parameter$Value>=12]<-"Monthly" # recode value here to 12 or 4 as well -df_minfreq_Parameter$Value[df_minfreq_Parameter$Value>=10]<-12 -df_minfreq_Parameter$Value[df_minfreq_Parameter$Value<10]<-4 +df_minfreq_Parameter$Value[df_minfreq_Parameter$Value>=12]<-12 +df_minfreq_Parameter$Value[df_minfreq_Parameter$Value<12]<-4 df_minfreq_Parameter <- df_minfreq_Parameter[is.na(df_minfreq_Parameter$Region)==FALSE,] @@ -143,12 +158,12 @@ names(df_minfreq) <- n #issue with join below -tmp <- merge(trends,df_minfreq,by=c("Location","Parameter"),all.x=TRUE) +tmp <- merge(trend_1,df_minfreq,by=c("Location","Parameter"),all.x=TRUE) # [2015-10-14] Joining data to select min frequency matches #tmp<-dplyr::left_join(trends,df_minfreq,by=c("Location","Parameter")) -tmp1<-tmp[tmp$m==tmp$Value,] - +#tmp1<-tmp[tmp$m==tmp$Value,] +tmp1<-tmp[,c(1:6,8:16)] # --------------------- @@ -164,14 +179,159 @@ tmp1<-tmp1[,c(1,2,7,8,4,10,9,6)] names(tmp1) <-c("Location", "Parameter", "Altitude", "Landuse", "TrendScore", "Frequency", "Region", "period") # remove any NA rows tmp1 <- tmp1[!is.na(tmp1$Location),] +tmp1 <- tmp1[tmp1$period==10,] + +# --------------------- +# 5 year TRENDS +# [2015-10-08] Load data to determine frequency's by region, catchment and site and retain those data that match +StartYear <- 2012 +EndYear <- 2016 +StartMonth <- 1 +EndMonth <- 12 +load(file="//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trenddata2012-2016.RData") + +lawadata <- samplesByInterval(StartYear,EndYear,StartMonth,EndMonth,lawadata) + +# Determine counts by site/parameter/year +df_minfreq_length <- summaryBy(Value~Region+Catchment+SiteName+parameter+year,data=lawadata, + id=~LAWAID+FrequencyAll, + FUN=c(length), keep.name=TRUE) + +# Determine annual medians counts by site/parameter +df_minfreq_Parameter <- summaryBy(Value~Region+Catchment+SiteName+parameter,data=df_minfreq_length, + id=~LAWAID, + FUN=c(median), keep.name=TRUE) + +# # Determine annual medians counts by site +# df_minfreq_Site <- summaryBy(Value~Region+Catchment+SiteName,data=df_minfreq_length, +# id=~LAWAID, +# FUN=c(median), keep.name=TRUE) + +# Determine annual medians counts by catchment +df_minfreq_Catchment <- summaryBy(Value~Region+Catchment+parameter,data=df_minfreq_length, + FUN=c(median), keep.name=TRUE) + +# Determine annual medians counts by region +df_minfreq_Region <- summaryBy(Value~Region+parameter,data=df_minfreq_length, + FUN=c(median), keep.name=TRUE) + +# Determining minimum frequency's +# Region +df_minfreq_Region$Frequency<-"Quarterly" +df_minfreq_Region$Frequency[df_minfreq_Region$Value==12]<-"Monthly" +# recode value here to 12 or 4 as well +df_minfreq_Region$Value[df_minfreq_Region$Value!=12]<-4 + +df_minfreq_Region <- df_minfreq_Region[is.na(df_minfreq_Region$Region)==FALSE,] +n <- names(df_minfreq_Region) +n[1]<-"Location" +names(df_minfreq_Region)<-n + +df_minfreq_Region$Region<-df_minfreq_Region$Location +df_minfreq_Region$Catchment<-"" +df_minfreq_Region$SiteName<-"" + +# Cross-join regions with parameters to have each parameter for each region +#df_minfreq_Region <- merge(x = df_minfreq_Region, y = wqparam, by = NULL) + + +# Catchment +df_minfreq_Catchment$Frequency<-"Quarterly" +df_minfreq_Catchment$Frequency[df_minfreq_Catchment$Value>=10]<-"Monthly" +# recode value here to 12 or 4 as well +df_minfreq_Catchment$Value[df_minfreq_Catchment$Value>=10]<-12 +df_minfreq_Catchment$Value[df_minfreq_Catchment$Value<10]<-4 + +df_minfreq_Catchment <- df_minfreq_Catchment[is.na(df_minfreq_Catchment$Region)==FALSE,] +n <- names(df_minfreq_Catchment) +n[2] <-"Location" +names(df_minfreq_Catchment)<-n + +df_minfreq_Catchment$Catchment<-"" +df_minfreq_Catchment$SiteName<-"" + +# Cross-join regions with parameters to have each parameter for each region +#df_minfreq_Catchment <- merge(x = df_minfreq_Catchment, y = wqparam, by = NULL) + +# Reordering columns +df_minfreq_Catchment <- df_minfreq_Catchment[,c(2,3,4,1,5,6,7)] + +# Converting location column to character +df_minfreq_Catchment$Location <- as.character(df_minfreq_Catchment$Location) + +# # Parameter +# df_minfreq_Parameter$Frequency<-"Quarterly" +# df_minfreq_Parameter$Frequency[df_minfreq_Parameter$Value>=10]<-"Monthly" +# # recode value here to 12 or 4 as well +# df_minfreq_Parameter$Value[df_minfreq_Parameter$Value>=10]<-12 +# df_minfreq_Parameter$Value[df_minfreq_Parameter$Value<10]<-4 + +# Parameter - this section updated 2017-09-04 to be more restrictive - changing criteria from 10 to 12. +# Justification: Site/parameter combos need 60 samples over 5 years - setting value to 10 results in +# having too many sites excluded as they do not meeting this criteria when 10 or 11 samples per year. +df_minfreq_Parameter$Frequency<-"Quarterly" +df_minfreq_Parameter$Frequency[df_minfreq_Parameter$Value>=12]<-"Monthly" +# recode value here to 12 or 4 as well +df_minfreq_Parameter$Value[df_minfreq_Parameter$Value>=12]<-12 +df_minfreq_Parameter$Value[df_minfreq_Parameter$Value<12]<-4 + + + +df_minfreq_Parameter <- df_minfreq_Parameter[is.na(df_minfreq_Parameter$Region)==FALSE,] +n <- names(df_minfreq_Parameter) +n[4]<-"parameter" +n[6]<-"Location" +names(df_minfreq_Parameter) <- n + +# [2015-10-14] Reordering columns +df_minfreq_Parameter <- df_minfreq_Parameter[,c(6,5,7,1:4)] + + +df_minfreq_Parameter <- df_minfreq_Parameter[,c(1,7,2,3:6)] +df_minfreq_Catchment <- df_minfreq_Catchment[,c(1:3,5,4,6:7)] + + +# [2015-10-14] Binding tables together in order to join results to TRENDS table +df_minfreq <- rbind.data.frame(df_minfreq_Region,df_minfreq_Catchment,df_minfreq_Parameter) + +n <- names(df_minfreq) +n[2]<-"Parameter" +names(df_minfreq) <- n + + +#issue with join below +tmp <- merge(trend_1,df_minfreq,by=c("Location","Parameter"),all.x=TRUE) + +# [2015-10-14] Joining data to select min frequency matches +#tmp<-dplyr::left_join(trends,df_minfreq,by=c("Location","Parameter")) +#tmp2<-tmp[tmp$m==tmp$Value,] +tmp2<-tmp[,c(1:6,8:16)] + + +# --------------------- +# [2015-10-14] Drop columns +# Columns to keep +# Location Parameter Altitude Landuse TrendScore Frequency Region period +# ARC-00014 DRP Lowland Urban -2 Monthly Auckland 5 +# ARC-00014 ECOLI Lowland Urban 0 Monthly Auckland 5 +# ARC-00014 TN Lowland Urban 0 Monthly Auckland 5 + + +tmp2<-tmp2[,c(1,2,7,8,4,10,9,6)] +names(tmp2) <-c("Location", "Parameter", "Altitude", "Landuse", "TrendScore", "Frequency", "Region", "period") +# remove any NA rows +tmp2 <- tmp2[!is.na(tmp2$Location),] +tmp2 <- tmp2[tmp2$period==5,] + +tmp0 <- rbind.data.frame(tmp1,tmp2) # --------------------- # Export to XL workbook -write.csv(tmp1,file="//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trend_fordelivery.csv") ## +write.csv(tmp0,file="//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trend_fordelivery.csv") ## # --------------------- # Housekeeping -rm(trends,tmp,tmp1,df_minfreq,df_minfreq_Region,df_minfreq_Parameter,df_minfreq_Catchment,df_minfreq_length,lawadata,n,wqparam) +rm(trends,trend_1,tmp,tmp1,df_minfreq,df_minfreq_Region,df_minfreq_Parameter,df_minfreq_Catchment,df_minfreq_length,lawadata,n,wqparam) # ====================== @@ -350,8 +510,12 @@ write.csv(state,file="//file/herman/R/OA/08/02/2017/Water Quality/ROutput/state_ # Load 10 year data pull - data.frame "lawadata" +# This is the 10 year data set with any censoring applied to values load("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trenddata2007-2016.RData",verbose=TRUE) -raw<-lawadata[,c(1,16,5,2,13,4,7,15,22,19,18,28,23)] + +raw<-lawadata[,c(1,14,13,5,2,3,4,7,21,20,16,17)] +# [1] "SiteName" "LAWAID" "parameter" "Date" "Value" "Method" "CenType +# [8] "Agency" "Region" "AltitudeGroup" "LanduseGroup" #rm(lawadata) raw<-raw[!is.na(raw$LAWAID),] @@ -361,7 +525,7 @@ raw$Symbol<-"" raw$Symbol[raw$CenType=="Left"]<-"<" raw$Symbol[raw$CenType=="Right"]<-">" -raw$RawValue <- paste(raw$Symbol,raw$OriginalValue,sep="") +raw$RawValue <- paste(raw$Symbol,raw$Value,sep="") raw$QC <- "" raw$Agency[grepl(pattern = "^NIWA",x=raw$Agency )] <- "NIWA" raw$Agency[grepl(pattern = "^Christchurch",x=raw$Agency )] <- "Christchurch" @@ -373,10 +537,9 @@ raw$Disclaimer[raw$Agency=="Horizons"] <- "The enclosed information is suppli However, as we endeavour to continuously improve our products, we reserve the right to amend the data on which this information is based, where necessary and without notice, at any time." raw$Disclaimer[raw$Agency=="Waikato"] <- "Waikato Regional Council provides this information in good faith and has exercised all reasonable skill and care in controlling the content of this information, and accepts no liability in contract, tort or otherwise, for any loss, damage, injury or expense (whether direct, indirect or consequential) arising out of the provision of this information or its use by you." -raw<-raw[,c(1:6,8:19)] +raw<-raw[,c(1:7,9:18)] -c <- c("Site", "SiteID", "Measurement", "DateTime", "Original.Value", "Method", "LAWAID", "Region", "Landuse", "Altitude", "Catchment", - "Agency", "Symbol","Raw.Value","QC", "License", "Ownership", "Disclaimer") +c <- c("Site", "SiteName","LAWAID", "Measurement", "DateTime", "Original.Value", "Method", "Agency", "Region", "Altitude", "Landuse", "Symbol","Raw.Value","QC", "License", "Ownership", "Disclaimer") names(raw) <- c diff --git a/lawa_10yrsDataPull.R b/lawa_10yrsDataPull.R index c03aae0..2c76c25 100644 --- a/lawa_10yrsDataPull.R +++ b/lawa_10yrsDataPull.R @@ -40,6 +40,14 @@ EndYear <- 2016 #/* -===Include required function libraries===- */ source("scripts/WQualityStateTrend/lawa_state_functions.R") +HilltopLibrary<-FALSE +library(Hilltop) +HilltopLibrary<-TRUE +#-- Specifying source files - note that these connections are pulling from SOE folder on HilltopDEV + +if(HilltopLibrary==TRUE){ + lawa <- Hilltop::HilltopData("//hilltopdev/data/lawa2017/state/lawa_provisional_2017.dsn") +} #/* -===Global variable/constant definitions===- */ vendor <- c("52NORTH","AQUATIC","HILLTOP","KISTERS") @@ -71,7 +79,7 @@ hts <- c("service=Hilltop", # Site data request #l <- SiteTable(databasePathFileName="//ares/waterquality/LAWA/2013/hilltop.mdb",sqlID=2) ## Assumes all sites have hilltop.mdb site names -requestData(vendor[tss],tss_url,"service=Hilltop&request=Reset") +if(HilltopLibrary!=TRUE) requestData(vendor[tss],tss_url,"service=Hilltop&request=Reset") # Replace database call here with call to previously loaded WFS Site data #l <- SiteTable(databasePathFileName="//file/herman/R/OA/08/02/2017/MASTER SiteList/lawa_2016.mdb",sqlID=3) ## Allows for different sitenames in hilltop.mdb - requires assessment and population of the database. @@ -94,18 +102,45 @@ cat("LAWA Water QUality TREND Analysis\n","Number of sites returned:",length(s)) #requestData(vendor[tss],tss_url,"service=Hilltop&request=Reset") for(i in 1:length(wqparam)){ - # Deprecated 18-Sep-2016 as censored data handled by functions - # supplied by Ton Snelder. - #tr <- subset(trendRules_csv,DefaultMeasurement==wqparam[i] & Trend=="5years" & Rule=="Halve non detect" & UsedInLAWA==TRUE) - requestData(vendor[tss],tss_url,"service=Hilltop&request=Reset") + ## Added Hilltop library 2017-09-07 + if(HilltopLibrary!=TRUE){ + requestData(vendor[tss],tss_url,"service=Hilltop&request=Reset") + } cat("Starting",wqparam[i],"\n") - r <- readUrl(vendor[tss],tss_url,paste(hts[1],hts[4],wqparam[i],hts[length(hts)],sep="")) - #r <- requestData(vendor[tss],tss_url,paste(hts[1],hts[4],wqparam[i],hts[length(hts)],sep="")) - wqdata <- MeasurementList(xmlmdata=r,requestType="Hilltop") - wqdata$Value <- as.character(wqdata$Value) - wqdata$parameter <- wqparam[i] - + if(HilltopLibrary==TRUE){ + lawa_collection <- GetCollection.HilltopData(lawa, "//hilltopdev/c$/HilltopServer/LAWA_collections.xml",paste("LAWA",wqparam[i],sep="_")) + mySites<-unique(lawa_collection[,1]) + myMeas<-unique(lawa_collection[,2]) + for(ii in 1:length(mySites)){ + dx <- try(GetData(lawa,mySites[ii], myMeas, startTime=paste(StartYear,"-01-01",sep=""), endTime=paste(EndYear + 1,"-01-01",sep=""), WQParams=FALSE),silent=TRUE) + if(attr(dx,"class")!="try-error"){ + if(ii==1){ + x1 <- unlist(attr(dx,"dimnames")) + x1df <- data.frame(index(dx),as.character(coredata(dx)),stringsAsFactors = FALSE) + x1df$SiteName<-mySites[ii];x1df$parameter<-wqparam[i];x1df$Method<-"" + x1df <- x1df[,c(3,1,2,5,4)] + names(x1df) <- c("SiteName" , "Date" , "Value" , "Method" , "parameter") + wqdata <- x1df + } else { + x1 <- unlist(attr(dx,"dimnames")) + x1df <- data.frame(index(dx),as.character(coredata(dx)),stringsAsFactors = FALSE) + x1df$SiteName<-mySites[ii];x1df$parameter<-wqparam[i];x1df$Method<-"" + x1df <- x1df[,c(3,1,2,5,4)] + names(x1df) <- c("SiteName" , "Date" , "Value" , "Method" , "parameter") + wqdata <- rbind.data.frame(wqdata,x1df,stringsAsFactors = FALSE) + } + rm(x1,x1df) + } + } + + } else { + r <- readUrl(vendor[tss],tss_url,paste(hts[1],hts[4],wqparam[i],hts[length(hts)],sep="")) + #r <- requestData(vendor[tss],tss_url,paste(hts[1],hts[4],wqparam[i],hts[length(hts)],sep="")) + wqdata <- MeasurementList(xmlmdata=r,requestType="Hilltop") + wqdata$Value <- as.character(wqdata$Value) + wqdata$parameter <- wqparam[i] + } # ------------------------ # Handling censored data # ------------------------ @@ -142,6 +177,10 @@ setwd(wd) r <- unique(lawadata$Region) +# disconnect from Hilltop object +Hilltop::disconnect(lawa) + + for(i in 1:length(r)){ write.csv(lawadata[lawadata$Region==r[i],],paste(r[i],".csv",sep=""),row.names = FALSE) } diff --git a/lawa_dataForGraphingOnLaWA.R b/lawa_dataForGraphingOnLaWA.R new file mode 100644 index 0000000..a68b28c --- /dev/null +++ b/lawa_dataForGraphingOnLaWA.R @@ -0,0 +1,38 @@ +#Import rdata set of 10 years of data +load("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/RC-Supplied-Data2007-2016.RData") + +# columns: LAWAID, SiteName, Measurement, Date, Date, Censored, Censored Type, Value, Region +raw_data <- lawadata[,c(9,1,5,2,2,6,7,3,16)] + +# Reorganising table +raw_data$Symbol[raw_data$CenType=="Left"] <- "<" +raw_data$Symbol[raw_data$CenType=="Right"] <- ">" +raw_data$Raw_Value <- raw_data$Value +raw_data$Raw_Value[!is.na(raw_data$Symbol)] <- paste(raw_data$Symbol[!is.na(raw_data$Symbol)],raw_data$Value[!is.na(raw_data$Symbol)],sep="") +raw_data <- raw_data[,c(1:5,11,10,8,9)] +names(raw_data) <- c("LAWAID","Site","Measurement","DateTime","Date","Raw Value","Symbol","Value","Region") + +#The export below will include all sites imported from Councils, irrespective of whether they +#have lawa id's or not. +#Export table +write.csv(raw_data,"//file/herman/R/OA/08/02/2017/Water Quality/ROutput/RiverWQ_GraphData.csv") + +#To write this table out, excluding sites without lawa ids, uncomment the following line. +write.csv(raw_data[!is.na(raw_data$LAWAID),],"//file/herman/R/OA/08/02/2017/Water Quality/ROutput/RiverWQ_GraphData_LAWASitesOnly.csv") + + +# #---------------------------------------------------------------------------------------------------------- +# # Filter data to remove NIWA +# raw_data <- lawadata[!grepl("NRWQN",lawadata$LAWAID,ignore.case = TRUE),c(1,5,2,2,6,7,13)] +# sitesToDrop <- c("Manawatu at Teachers Coll.","Manwatu at Weber Rd","Manawatu at Opiki Br.") # NIWA Sites not identified by NRWQN ids +# for(i in length(sitesToDrop)){ +# raw_data <- raw_data[raw_data$Site!=sitesToDrop[i],] +# } +# raw_data$Symbol[raw_data$CenType=="Left"] <- "<" +# raw_data$Symbol[raw_data$CenType=="Right"] <- ">" +# raw_data$Raw_Value <- raw_data$OriginalValue +# raw_data$Raw_Value[!is.na(raw_data$Symbol)] <- paste(raw_data$Symbol[!is.na(raw_data$Symbol)],raw_data$OriginalValue[!is.na(raw_data$Symbol)],sep="") +# raw_data <- raw_data[,c(1:4,9,8,7)] +# names(raw_data) <- c("Site","Measurement","DataTime","Date","Raw Value","Symbol","Value") +# +# #rm(lawadata,raw_data) diff --git a/lawa_dataPrep_WFS.R b/lawa_dataPrep_WFS.R index c594960..c7ac65b 100644 --- a/lawa_dataPrep_WFS.R +++ b/lawa_dataPrep_WFS.R @@ -92,6 +92,7 @@ cc <- function(file){ ## Load csv with WFS addresses urls2017 <- "//file/herman/R/OA/08/02/2017/Water Quality/R/lawa_state/CouncilWFS.csv" urls <- read.csv(urls2017,stringsAsFactors=FALSE) +urls$Agency[urls$Agency=="TDC"] <- "xTDC" ## Commenting out Tasman DC due to Hilltop Server issues #urls2016 <- "//file/herman/R/OA/08/02/2016/Water Quality/R/lawa_state/CouncilWFS.csv" #urls <- read.csv(urls2016,stringsAsFactors=FALSE) stopGapNames <- read.csv("agencyRegion.csv",stringsAsFactors=FALSE) diff --git a/lawa_load_swq.R b/lawa_load_swq.R index e33bc45..1d14796 100644 --- a/lawa_load_swq.R +++ b/lawa_load_swq.R @@ -20,24 +20,23 @@ try(shell(paste('mkdir "R:/2017/Water Quality/4.Analysis/"',format(Sys.Date(),"% ## import destination will be in folder with todays date (created above) importDestination <- paste("//file/herman/R/OA/08/02/2017/Water Quality/1.Imported/",format(Sys.Date(),"%Y-%m-%d"),"/",sep="") - -#Northland -source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/loadNRC.R") -rm("Data","df","df2","df2","sample","udf") - -#Auckland -source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/loadAC.R") -rm("Data","df","df2","df2","sample","udf") - -#Waikato -source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/loadWRC.R") -rm("Data","df","df2","df2","sample","udf") - -#Bay of Plenty -source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/loadBOP.R") -rm("Data","df","df2","df2","sample","udf") - -#Gisborne +# #Northland + source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/loadNRC.R") + rm("Data","df","df2","df2","sample","udf") +# +# #Auckland + source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/loadAC.R") + rm("Data","df","df2","df2","sample","udf") +# +# #Waikato + source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/loadWRC.R") + rm("Data","df","df2","df2","sample","udf") +# +# # #Bay of Plenty +# source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/loadBOP.R") +# rm("Data","df","df2","df2","sample","udf") + +# #Gisborne source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/loadGDC.R") rm("Data","df","df2","df2","sample","udf") @@ -49,9 +48,9 @@ rm("Data","df","df2","df2","sample","udf") source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/loadHBRC_v2.R") rm("Data","df","df2","df2","sample","udf") -#Horizons -source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/loadHRC.R") -rm("Data","df","df2","df2","sample","udf") +# #Horizons +# source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/loadHRC.R") +# rm("Data","df","df2","df2","sample","udf") #Greater Wellington source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/loadGW.R") @@ -61,9 +60,9 @@ rm("Data","df","df2","df2","sample","udf") source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/loadNCC.R") rm("Data","df","df2","df2","sample","udf") -#Tasman -source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/loadTDC.R") -rm("Data","df","df2","df2","sample","udf") +# #Tasman +# source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/loadTDC.R") +# rm("Data","df","df2","df2","sample","udf") #Marlborough source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/loadMDC.R") @@ -81,9 +80,9 @@ rm("Data","df","df2","df2","sample","udf") source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/loadES.R") rm("Data","df","df2","df2","sample","udf") -#NIWA -source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/loadNIWA.R") -rm("Data","df","df2","df2","sample","udf") +# #NIWA +# source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/loadNIWA.R") +# rm("Data","df","df2","df2","sample","udf") #West Coast source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/loadWCRC.R") diff --git a/lawa_state_5yr.R b/lawa_state_5yr.R index f4e3a7c..1839dbb 100644 --- a/lawa_state_5yr.R +++ b/lawa_state_5yr.R @@ -57,12 +57,22 @@ EndYear <- 2016 source("scripts/WQualityStateTrend/lawa_state_functions.R") +HilltopLibrary<-FALSE +library(Hilltop) +HilltopLibrary<-TRUE +#-- Specifying source files - note that these connections are pulling from SOE folder on HilltopDEV + +if(HilltopLibrary==TRUE){ + lawa <- Hilltop::HilltopData("//hilltopdev/data/lawa2017/state/lawa_provisional_2017.dsn") +} + #/* -===Global variable/constant definitions===- */ vendor <- c("52NORTH","AQUATIC","HILLTOP","KISTERS") #/* -===Local variable/constant definitions===- */ -wqparam <- c("BDISC","TURB","NH4","TON","TN","DRP","TP","ECOLI") +# Need to retain PH in this list for NOF Calculations later +wqparam <- c("BDISC","TURB","NH4","PH","TON","TN","DRP","TP","ECOLI") #wqparam <- c("BDISC") tss <- 3 # tss = time series server #tss_url <- "http://hilltopdev.horizons.govt.nz/lawa2014.hts?" @@ -86,8 +96,9 @@ hts <- c("service=Hilltop", # Site data request #l <- SiteTable(databasePathFileName="//ares/waterquality/LAWA/2013/hilltop.mdb",sqlID=2) ## Assumes all sites have hilltop.mdb site names -requestData(vendor[tss],tss_url,"service=Hilltop&request=Reset") - +if(HilltopLibrary==FALSE){ + requestData(vendor[tss],tss_url,"service=Hilltop&request=Reset") +} # Replace database call here with call to previously loaded WFS Site data #l <- SiteTable(databasePathFileName="//file/herman/R/OA/08/02/2016/MASTER SiteList/lawa_2016.mdb",sqlID=3) ## Allows for different sitenames in hilltop.mdb - requires assessment and population of the database. l <- read.csv("LAWA_Site_Table1.csv",stringsAsFactors=FALSE) @@ -96,9 +107,12 @@ l <- read.csv("LAWA_Site_Table1.csv",stringsAsFactors=FALSE) l$SWQLanduse[l$SWQLanduse=="Native"|l$SWQLanduse=="Exotic"|l$SWQLanduse=="Natural"] <- "Forest" -r <- requestData(vendor[tss],tss_url,request=paste(hts[1],hts[2],sep="")) -s <- SiteList(r) - +if(HilltopLibrary==TRUE){ + s <- Hilltop::SiteList(lawa) +} else { + r <- requestData(vendor[tss],tss_url,request=paste(hts[1],hts[2],sep="")) + s <- SiteList(r) +} cat("LAWA Water QUality State Analysis\n","Number of sites returned:",length(s)) @@ -114,19 +128,50 @@ cat("LAWA Water QUality State Analysis\n","Number of sites returned:",length(s)) # cat(wqparam[i]," sites returned: ",length(unique(wqdata$SiteName)),"\n",sep="") # } + # -=== WQ PARAMETERS ===- #requestData(vendor[tss],tss_url,"service=Hilltop&request=Reset") for(i in 1:length(wqparam)){ - - requestData(vendor[tss],tss_url,"service=Hilltop&request=Reset") +# for(i in 4:4){ + ## Added Hilltop library 2017-09-07 + if(HilltopLibrary==FALSE){ + requestData(vendor[tss],tss_url,"service=Hilltop&request=Reset") + } cat("Starting",wqparam[i],"\n") - r <- readUrl(vendor[tss],tss_url,paste(hts[1],hts[4],wqparam[i],hts[length(hts)],sep="")) - #r <- requestData(vendor[tss],tss_url,paste(hts[1],hts[4],wqparam[i],hts[length(hts)],sep="")) - wqdata <- MeasurementList(xmlmdata=r,requestType="Hilltop") - wqdata$Value <- as.character(wqdata$Value) - wqdata$parameter <- wqparam[i] - - + + if(HilltopLibrary==TRUE){ + lawa_collection <- GetCollection.HilltopData(lawa, "//hilltopdev/c$/HilltopServer/LAWA_collections.xml",paste("LAWA",wqparam[i],sep="_")) + mySites<-unique(lawa_collection[,1]) + myMeas<-unique(lawa_collection[,2]) + for(ii in 1:length(mySites)){ + dx <- try(GetData(lawa,mySites[ii], myMeas, startTime=paste(StartYear,"-01-01",sep=""), endTime=paste(EndYear + 1,"-01-01",sep=""), WQParams=FALSE),silent=TRUE) + if(attr(dx,"class")!="try-error"){ + if(!exists("wqdata")){ + x1 <- unlist(attr(dx,"dimnames")) + x1df <- data.frame(index(dx),as.character(coredata(dx)),stringsAsFactors = FALSE) + x1df$SiteName<-mySites[ii];x1df$parameter<-wqparam[i];x1df$Method<-"" + x1df <- x1df[,c(3,1,2,5,4)] + names(x1df) <- c("SiteName" , "Date" , "Value" , "Method" , "parameter") + wqdata <- x1df + } else { + x1 <- unlist(attr(dx,"dimnames")) + x1df <- data.frame(index(dx),as.character(coredata(dx)),stringsAsFactors = FALSE) + x1df$SiteName<-mySites[ii];x1df$parameter<-wqparam[i];x1df$Method<-"" + x1df <- x1df[,c(3,1,2,5,4)] + names(x1df) <- c("SiteName" , "Date" , "Value" , "Method" , "parameter") + wqdata <- rbind.data.frame(wqdata,x1df,stringsAsFactors = FALSE) + } + rm(x1,x1df) + } + } + + } else { + r <- readUrl(vendor[tss],tss_url,paste(hts[1],hts[4],wqparam[i],hts[length(hts)],sep="")) + #r <- requestData(vendor[tss],tss_url,paste(hts[1],hts[4],wqparam[i],hts[length(hts)],sep="")) + wqdata <- MeasurementList(xmlmdata=r,requestType="Hilltop") + wqdata$Value <- as.character(wqdata$Value) + wqdata$parameter <- wqparam[i] + } # ------------------------ # Handling censored data # ------------------------ @@ -281,13 +326,13 @@ for(i in 1:length(wqparam)){ # data=wqdata_A, # FUN=c(median), na.rm=TRUE, keep.name=TRUE) - ## using hazen method for median - quantile(prob=c(0.5),type=5) - wqdata_med <- summaryBy(Value~LAWAID+SiteName+parameter+dayDate, - id=~ID+Agency+Region+NZREACH+ - LanduseGroup+AltitudeGroup+Catchment+CatchLbl+Frequency+ - Comments+UsedInLAWA, - data=wqdata_A, - FUN=c(quantile), prob=c(0.5), type=5, na.rm=TRUE, keep.name=TRUE) + ## using hazen method for median - quantile(prob=c(0.5),type=5) + wqdata_med <- summaryBy(Value~LAWAID+SiteName+parameter+dayDate, + id=~ID+Agency+Region+NZREACH+ + LanduseGroup+AltitudeGroup+Catchment+CatchLbl+Frequency+ + Comments+UsedInLAWA, + data=wqdata_A, + FUN=c(quantile), prob=c(0.5), type=5, na.rm=TRUE, keep.name=TRUE) # Building dataframe to save at the end of this step if(i==1){ @@ -318,43 +363,48 @@ for(i in 1:length(wqparam)){ # - less than 30 samples for monthly samples # - less than 80 percent of samples for bimonthly/quarterly - - cat("LAWA Water Quality State Analysis\n",wqparam[i]) - - print(Sys.time() - x) - - cat("\nLAWA Water QUality State Analysis\nCalculating reference quartiles\n") - - state <- c("Site","Catchment","Region","NZ") - level <- c("LandUseAltitude","LandUse","Altitude","None") - - sa11 <- StateAnalysis(wqdata,state[1],level[1]) - - sa21 <- StateAnalysis(wqdata,state[2],level[1]) - sa22 <- StateAnalysis(wqdata,state[2],level[2]) - sa23 <- StateAnalysis(wqdata,state[2],level[3]) - sa24 <- StateAnalysis(wqdata,state[2],level[4]) - - sa31 <- StateAnalysis(wqdata,state[3],level[1]) - sa32 <- StateAnalysis(wqdata,state[3],level[2]) - sa33 <- StateAnalysis(wqdata,state[3],level[3]) - sa34 <- StateAnalysis(wqdata,state[3],level[4]) - - sa41 <- StateAnalysis(wqdata,state[4],level[1]) - sa42 <- StateAnalysis(wqdata,state[4],level[2]) - sa43 <- StateAnalysis(wqdata,state[4],level[3]) - sa44 <- StateAnalysis(wqdata,state[4],level[4]) - - cat("LAWA Water QUality State Analysis\n","Binding ",wqparam[i]," data together for measurement\n") - - if(i==1){ - sa <- rbind(sa11,sa21,sa22,sa23,sa24,sa31,sa32,sa33,sa34,sa41,sa42,sa43,sa44) - } else { - sa <- rbind(sa,sa11,sa21,sa22,sa23,sa24,sa31,sa32,sa33,sa34,sa41,sa42,sa43,sa44) + # Exclude PH + if(wqparam[i]!="PH0"){ + cat("LAWA Water Quality State Analysis\n",wqparam[i]) + + print(Sys.time() - x) + + cat("\nLAWA Water QUality State Analysis\nCalculating reference quartiles\n") + + state <- c("Site","Catchment","Region","NZ") + level <- c("LandUseAltitude","LandUse","Altitude","None") + + sa11 <- StateAnalysis(wqdata,state[1],level[1]) + + sa21 <- StateAnalysis(wqdata,state[2],level[1]) + sa22 <- StateAnalysis(wqdata,state[2],level[2]) + sa23 <- StateAnalysis(wqdata,state[2],level[3]) + sa24 <- StateAnalysis(wqdata,state[2],level[4]) + + sa31 <- StateAnalysis(wqdata,state[3],level[1]) + sa32 <- StateAnalysis(wqdata,state[3],level[2]) + sa33 <- StateAnalysis(wqdata,state[3],level[3]) + sa34 <- StateAnalysis(wqdata,state[3],level[4]) + + sa41 <- StateAnalysis(wqdata,state[4],level[1]) + sa42 <- StateAnalysis(wqdata,state[4],level[2]) + sa43 <- StateAnalysis(wqdata,state[4],level[3]) + sa44 <- StateAnalysis(wqdata,state[4],level[4]) + + cat("LAWA Water QUality State Analysis\n","Binding ",wqparam[i]," data together for measurement\n") + + if(i==1){ + sa <- rbind(sa11,sa21,sa22,sa23,sa24,sa31,sa32,sa33,sa34,sa41,sa42,sa43,sa44) + } else { + sa <- rbind(sa,sa11,sa21,sa22,sa23,sa24,sa31,sa32,sa33,sa34,sa41,sa42,sa43,sa44) + } } - + rm(wqdata) } +# disconnect from Hilltop object +Hilltop::disconnect(lawa) + # Housekeeping # - Saving the lawadata table save(lawadata,file=paste("//file/herman/r/oa/08/02/2017/Water Quality/ROutput/lawadata",StartYear,"-",EndYear,".RData",sep="")) @@ -364,6 +414,11 @@ save(l,file="//file/herman/r/oa/08/02/2017/Water Quality/ROutput/lawa_sitetable. load(file=paste("//file/herman/r/oa/08/02/2017/Water Quality/ROutput/lawadata",StartYear,"-",EndYear,".RData",sep="")) #Sites that are missing LAWAIDs chkbb<- unique(lawadata$SiteName[is.na(lawadata$LAWAID)]) +# -- WORTH CHECKING _WHY_ SITES ARE MISSING LAWA IDs +# Check: +# 1. There may be differences in siteIds and Council siteids when data is joined, resulting +# in unintended omissions. + cat("Number of unique sites",length(unique(lawadata$SiteName))) cat("Number of unique LAWAIDs",length(unique(lawadata$LAWAID))) @@ -429,7 +484,7 @@ for(i in 1:3){ # Need to put a test into the StateScore function to return an empty dataframe # RE-ENABLE THIS ONCE BOPRC data available - #ss413 <- StateScore(sa,scope[i],"Upland","Urban",wqparam,comparison=4) + ss413 <- StateScore(sa,scope[i],"Upland","Urban",wqparam,comparison=4) ss421 <- StateScore(sa,scope[i],"Lowland","Rural",wqparam,comparison=4) ss422 <- StateScore(sa,scope[i],"Lowland","Forest",wqparam,comparison=4) diff --git a/lawa_state_functions.R b/lawa_state_functions.R index 97a8389..9aeeb23 100644 --- a/lawa_state_functions.R +++ b/lawa_state_functions.R @@ -34,12 +34,12 @@ # */ -pkgs <- c('XML', 'RCurl','ggplot2','gridExtra','plyr','reshape2','RODBC','doBy','NADA','gdata','survival') +pkgs <- c('XML', 'RCurl','ggplot2','gridExtra','plyr','reshape2','RODBC','doBy','NADA','gdata','survival','lubridate','tidyr','dplyr','EnvStats') if(!all(pkgs %in% installed.packages()[, 'Package'])) install.packages(pkgs, dep = T) require(XML) # XML library -require(RCurl) +require(RCurl) # managing, parsing URL calls and responses require(reshape2) # melt, cast, ... require(ggplot2) # pretty plots ... require(gridExtra) @@ -49,6 +49,10 @@ require(doBy) require(NADA) require(gdata) require(survival) +require(lubridate) +require(tidyr) +require(dplyr) +require(EnvStats) # Library from EPA with seasonal kendall #=================================================================================================== #/* -===Pseudo-Function prototypes===- @@ -714,7 +718,18 @@ calcScore <- function(df1,df2,wqparam){ return(df1) } -calcTrendScore <- function(df1){ + +calcTrendScore <- function(df1,trendMethod="ci"){ + if(trendMethod=="sig"){ + x<-calcTrendScore.sig(df1) + } else if(trendMethod=="ci"){ + x<-calcTrendScore.ci(df1) + } + return(x) +} + + +calcTrendScore.sig <- function(df1){ #names(seasonalkendall) <- c("LAWAID","Parameter","Sen.Pct","Sen.Slope","p.value") #trendscores <- calcTrendScore(seasonalkendall) @@ -750,7 +765,41 @@ calcTrendScore <- function(df1){ return(df1) } -calcTrendScoreAggregate <- function(df1){ + +calcTrendScore.ci <- function(df1){ + #names(seasonalkendall) <- c("LAWAID","Parameter","Sen.Pct","Sen.Slope","p.value") + #trendscores <- calcTrendScore(seasonalkendall) + + df1 <- na.omit(df1) + # Step 1 + # Start by setting trend score to 0 + df1$TrendScore <- 0 + + # Step 2 + # Assessment of existance of slope based on Confidence limits + df1$zeroLocationCL90 <- FALSE ## Set initial state for zero outside 90pct CI bounds + df1$zeroLocationCL90[df1$Sen.Slope.LCL90<0&seasonalkendall$Sen.Slope.UCL90>0] <- TRUE + df1$TrendScore[!df1$zeroLocationCL90] <- 1 + + # Step 3 + # Applying direction of slope to TrendScore + df1$TrendScore[df1$Parameter=="BDISC"&df1$Sen.Slope<0] <- df1$TrendScore[df1$Parameter=="BDISC"&df1$Sen.Slope<0]*-1 + df1$TrendScore[df1$Parameter!="BDISC"&df1$Sen.Slope>0] <- df1$TrendScore[df1$Parameter!="BDISC"&df1$Sen.Slope>0]*-1 + return(df1) + +} + + +calcTrendScoreAggregate <- function(df1,trendMethod="ci"){ + if(trendMethod=="sig"){ + x<-calcTrendScoreAggregate.sig(df1) + } else if(trendMethod=="ci"){ + x<-calcTrendScoreAggregate.ci(df1) + } + return(x) +} + +calcTrendScoreAggregate.sig <- function(df1){ #names(seasonalkendall) <- c("LAWAID","Parameter","Sen.Pct","Sen.Slope","p.value") #trendscores <- calcTrendScore(seasonalkendall) @@ -784,6 +833,15 @@ calcTrendScoreAggregate <- function(df1){ return(df1) } +calcTrendScoreAggregate.ci <- function(df1){ + df1$TrendScoreAgg <- 0 + df1$TrendScoreAgg[df1$Direction>0] <- 1 + df1$TrendScoreAgg[df1$Direction<0] <- -1 + + return(df1) +} + + PlotSites1 <- function(df,label){ pdf("test.pdf", width=21, height=27.8) @@ -938,6 +996,42 @@ seaKenLAWA <- function (x,stat) { } + +seaKenEPA <- function(x,stat="median"){ +# this function depends on the EnvStats package, and was +# developed against EnvStats v2.3.0 + + test.data <- x + Unadj.Conc <- test.data$Value + Month<-test.data$mon + Year <- as.numeric(test.data$year) + Time <-test.data$yearMon + n<-length(Unadj.Conc) + Ten.Yr.Mean <- mean(Unadj.Conc,na.rm=TRUE) + Ten.Yr.Medn <- median(Unadj.Conc,na.rm=TRUE) + + test.output <- kendallSeasonalTrendTest(Unadj.Conc ~ Month + Year, data=test.data,conf.level=0.9) + sk.sen.slope <- as.numeric(test.output$estimate[2]) # slope - estimated parameter + sk.sen.z <- as.numeric(test.output$statistic[2]) # z statistic for trend slope + sk.p.value <- as.numeric(test.output$p.value[2]) # p value for slope + sk.sen.z.prob <- pnorm(sk.sen.z) + sk.sen.slope.pct <- 100 * sk.sen.slope/abs(Ten.Yr.Medn) + sk.slope.LCL90 <- as.numeric(test.output$interval$limits[1]) # Lower 90% confidence interval value + sk.slope.UCL90 <- as.numeric(test.output$interval$limits[2]) # Upper 90% confidence interval value + + list(sen.slope = sk.sen.slope, + sen.slope.pct = sk.sen.slope.pct, + p.value = sk.p.value, + sen.z = sk.sen.z, + sen.z.prob = sk.sen.z.prob, + slope.lcl90 = sk.slope.LCL90, + slope.ucl90 = sk.slope.UCL90) + +} + + + + prepareData <- function(value){ # 1. Asterixes @@ -968,6 +1062,9 @@ na_asterixes_val <- function(value){ return(value) } + +## Function: Identify censored values +## Assumption: all values with < or > symbols are non-zero flagCensoredDataDF <- function(df){ df$Censored<-FALSE df$CenType<-"FALSE" @@ -996,6 +1093,10 @@ flagCensoredDataDF <- function(df){ # Remove < symbol df$Value[n==TRUE]<-gsub(pattern = "^>", replacement = "", x = df$Value[n==TRUE]) + ## Added 30-08-2017 to deal with <0 values + df$Censored[df$CenType=='Left' & df$Value==0]<-FALSE + df$CenType[df$CenType=='Left' & df$Value==0] <- "FALSE" + return(df) } @@ -1173,22 +1274,22 @@ TrendCriteria <- function (first_20pct, last__20pct, num_samples, rate, years, m } samplesByInterval <- function (StartYear, EndYear, StartMonth, EndMonth, lawadata) { - # Creating YYYY-MM values for period of trend analysis - for(i in StartYear:EndYear){ - for(j in StartMonth:EndMonth){ - if(i==StartYear && j==StartMonth){ - #Using formatC to pad months 1-9 with leading zero - arr<-paste(i,"-",formatC(j, width = 2, format = "d", flag = "0") ,sep="") - } else { - arr<-c(arr,paste(i,"-",formatC(j, width = 2, format = "d", flag = "0") ,sep="")) - } - } - } - - - #Setting up data frames with one value per month, bimonth and quarter - aa<- as.data.frame(arr) - names(aa) <- c("yearMon") + # # Creating YYYY-MM values for period of trend analysis + # for(i in StartYear:EndYear){ + # for(j in StartMonth:EndMonth){ + # if(i==StartYear && j==StartMonth){ + # #Using formatC to pad months 1-9 with leading zero + # arr<-paste(i,"-",formatC(j, width = 2, format = "d", flag = "0") ,sep="") + # } else { + # arr<-c(arr,paste(i,"-",formatC(j, width = 2, format = "d", flag = "0") ,sep="")) + # } + # } + # } + # + # + # #Setting up data frames with one value per month, bimonth and quarter + # aa<- as.data.frame(arr) + # names(aa) <- c("yearMon") lawadata$depth <- 0 # indicating surface samples @@ -1196,15 +1297,35 @@ samplesByInterval <- function (StartYear, EndYear, StartMonth, EndMonth, lawadat lawadata$mon <- as.character(lawadata$Date,format="%m") lawadata$yearMon <- paste(lawadata$year,"-",lawadata$mon,"-01",sep="") + # West Coast adjustment given they collect seasons based on actual seasons, not annual quarters + wcrc <- lawadata[lawadata$Agency=="WCRC",] + wcrc <- wcrc[complete.cases(wcrc$SiteName),] + wcrc$yearMon<- as.character(ymd(wcrc$yearMon) %m+% months(1)) + sum(is.na(wcrc$yearMon)) + + lawadata <- lawadata[!lawadata$Agency=="WCRC",] + lawadata <- rbind.data.frame(lawadata,wcrc) + lawadata <- lawadata[!is.na(lawadata$Date),] + rm(wcrc) + + # Resetting year and mon variables based on update to WCRC + #lawadata$year <- as.character(lawadata$yearMon,format="%Y") + #lawadata$mon <- as.character(lawadata$yearMon,format="%m") + + lawadata$year <- substr(lawadata$yearMon,start = 1,stop=4) + lawadata$mon <- substr(lawadata$yearMon,start = 6,stop=7) + lawadata$bimon <- floor((as.numeric(lawadata$mon)-1)/2)*2+1 # returns 1,2,3,4,5,6 depending on bimonth of year month falls in lawadata$yearBimon <- paste(lawadata$year,"-",formatC(lawadata$bimon, width = 2, format = "d", flag = "0"),"-01",sep="") - lawadata$Qtr <- floor((as.numeric(lawadata$mon)-1)/3)*3+1 # returns 1,2,3,4 depending on quarter of year month falls in lawadata$yearQtr <- paste(lawadata$year,"-",formatC(lawadata$Qtr, width = 2, format = "d", flag = "0"),"-01",sep="") + return(lawadata) } + + #Ton Snelder #LWP Ltd #March 2016 @@ -1238,7 +1359,7 @@ leftCensored <- function(x, forwardT="log", reverseT="exp") { # x<-tmp # forwardT="log" # reverseT="exp" - # ------------------------------------------------------------------------------------------------ + #------------------------------------------------------------------------------------------------ # Sean Hodges 2017-07-09 # This routine differs from the originally supplied censoring algorithm # The following code block adds in the variables described above. This should be worked into the @@ -1247,7 +1368,7 @@ leftCensored <- function(x, forwardT="log", reverseT="exp") { # x$dflag :a flag indicating whether the raw data are left censored ("dl"), right censored ("al"), # or within the lower and upper detection limits ("ok") x$converted_values <- x$Value - + # x$dflag <- "ok" x$dflag[x$Censored==TRUE & x$CenType=="Left"] <- "dl" @@ -1255,6 +1376,7 @@ leftCensored <- function(x, forwardT="log", reverseT="exp") { # ------------------------------------------------------------------------------------------------ myData <- x + #names(myData) <- c("SiteName","Date","converted_values","Method","parameter","Censored","CenType","dflag" ) myData$dflag <- as.character(myData$dflag) #cat("Site = ", as.character(x$SiteName)[1], " & Variable = ", as.character(x$parameter)[1], "\n") diff --git a/lawa_trend_only_x_yrs.R b/lawa_trend_only_x_yrs.R new file mode 100644 index 0000000..0d5ade3 Binary files /dev/null and b/lawa_trend_only_x_yrs.R differ diff --git a/lawa_trend_x_yrs.R b/lawa_trend_x_yrs.R index 1fc9590..7c5cb75 100644 --- a/lawa_trend_x_yrs.R +++ b/lawa_trend_x_yrs.R @@ -9,8 +9,801 @@ # Horizons Regional Council #=================================================================================================== +rm(list = ls()) +TRENDPERIOD <- 5 # years + + # Clearing workspace + + +ANALYSIS<-"TREND" +# Set working directory +od <- getwd() +wd <- "//file/herman/R/OA/08/02/2017/Water Quality/R/lawa_state" +setwd(wd) + +# Clean up output folder before starting script. +# cleanup <- FALSE +# if(cleanup){ +# rOutput <- "//file/herman/r/oa/08/02/2017/Water Quality/ROutput" +# files <- list.files(rOutput) +# if(length(files) >0){ +# for(i in 1:length(files)){ +# file.remove(paste(rOutput,"/",files[i],sep="")) +# } +# } +# } + +x <- Sys.time() + + +#Reference Dates +EndYear <- 2016 + + +StartYear <- EndYear - TRENDPERIOD + 1 + +#if(!exists(foo, mode="function")) source("lawa_state_functions.R") + +#/* -===Include required function libraries===- */ + +source("scripts/WQualityStateTrend/lawa_state_functions.R") +HilltopLibrary<-FALSE +library(Hilltop) +HilltopLibrary<-TRUE +#-- Specifying source files - note that these connections are pulling from SOE folder on HilltopDEV + +if(HilltopLibrary==TRUE){ + lawa <- Hilltop::HilltopData("//hilltopdev/data/lawa2017/state/lawa_provisional_2017.dsn") +} + +#/* -===Global variable/constant definitions===- */ +vendor <- c("52NORTH","AQUATIC","HILLTOP","KISTERS") + +seasons <- read.csv("seasons.csv") + +#/* -===Local variable/constant definitions===- */ +wqparam <- c("BDISC","TURB","NH4","TON","TN","DRP","TP","ECOLI") + +tss <- 3 # tss = time series server +# tss_url <- "http://hilltopdev.horizons.govt.nz/lawa2014trend05.hts?" +# tss_url <- "http://hilltopdev.horizons.govt.nz:8080/lawa2017trend05.lawa?" +tss_url <- "http://hilltopdev.horizons.govt.nz:8080/lawa2017.lawa?" + +hts <- c("service=Hilltop", + "&request=SiteList", + "&request=MeasurementList", + "&request=GetData&collection=LAWA_", + paste("&from=",StartYear,"-01-01&to=",EndYear+1,"-01-01",sep="") +) +#_52N +#_kqs +#_sos <- c("service=SOS&request=GetObservation&featureOfInterest=","&observedProperty=","&temporalFilter=om:phenomenom,") + + +#/* -===Subroutine===- +#// void main(){} +#*/ + +# Site data request + +#l <- SiteTable(databasePathFileName="//ares/waterquality/LAWA/2013/hilltop.mdb",sqlID=2) ## Assumes all sites have hilltop.mdb site names +if(HilltopLibrary!=TRUE) requestData(vendor[tss],tss_url,"service=Hilltop&request=Reset") + +# Replace database call here with call to previously loaded WFS Site data +#l <- SiteTable(databasePathFileName="//file/herman/R/OA/08/02/2017/MASTER SiteList/lawa_2017.mdb",sqlID=3) ## Allows for different sitenames in hilltop.mdb - requires assessment and population of the database. +l <- read.csv("LAWA_Site_Table1.csv",stringsAsFactors=FALSE) + +l$SWQLanduse[l$SWQLanduse=="Native"|l$SWQLanduse=="Exotic"|l$SWQLanduse=="Natural"] <- "Forest" + +## fixing gaps in Christchurch city council data +l$SiteID[l$SiteID==""] <- l$CouncilSiteID[l$SiteID==""] + +if(HilltopLibrary==TRUE){ + s <- Hilltop::SiteList(lawa) +} else { + r <- requestData(vendor[tss],tss_url,request=paste(hts[1],hts[2],sep="")) + s <- SiteList(r) +} + + +# Load Reference data for Trends --- NO LONGER REQUIRED WITH FUNCTIONS FROM TON +# --- SNELDER TO IMPUTE CENSORED VALUES +#trendRules_csv <- read.csv(file=paste("//file/herman/R/OA/08/02/2017/Water Quality/RScript/lawa_state/trendrules.csv",sep=""),header=TRUE,sep=",",quote = "\"") + +cat("LAWA Water QUality TREND Analysis\n","Number of sites returned:",length(s)) + + +# -=== WQ PARAMETERS ===- +#requestData(vendor[tss],tss_url,"service=Hilltop&request=Reset") +for(i in 1:length(wqparam)){ + ## Added Hilltop library 2017-09-07 + if(HilltopLibrary==FALSE){ + requestData(vendor[tss],tss_url,"service=Hilltop&request=Reset") + } + cat("Starting",wqparam[i],"\n") + + if(HilltopLibrary==TRUE){ + lawa_collection <- GetCollection.HilltopData(lawa, "//hilltopdev/c$/HilltopServer/LAWA_collections.xml",paste("LAWA",wqparam[i],sep="_")) + mySites<-unique(lawa_collection[,1]) + myMeas<-unique(lawa_collection[,2]) + for(ii in 1:length(mySites)){ + dx <- try(GetData(lawa,mySites[ii], myMeas, startTime=paste(StartYear,"-01-01",sep=""), endTime=paste(EndYear + 1,"-01-01",sep=""), WQParams=FALSE),silent=TRUE) + if(attr(dx,"class")!="try-error"){ + if(ii==1){ + x1 <- unlist(attr(dx,"dimnames")) + x1df <- data.frame(index(dx),as.character(coredata(dx)),stringsAsFactors = FALSE) + x1df$SiteName<-mySites[ii];x1df$parameter<-wqparam[i];x1df$Method<-"" + x1df <- x1df[,c(3,1,2,5,4)] + names(x1df) <- c("SiteName" , "Date" , "Value" , "Method" , "parameter") + wqdata <- x1df + } else { + x1 <- unlist(attr(dx,"dimnames")) + x1df <- data.frame(index(dx),as.character(coredata(dx)),stringsAsFactors = FALSE) + x1df$SiteName<-mySites[ii];x1df$parameter<-wqparam[i];x1df$Method<-"" + x1df <- x1df[,c(3,1,2,5,4)] + names(x1df) <- c("SiteName" , "Date" , "Value" , "Method" , "parameter") + wqdata <- rbind.data.frame(wqdata,x1df,stringsAsFactors = FALSE) + } + rm(x1,x1df) + } + } + + } else { + r <- readUrl(vendor[tss],tss_url,paste(hts[1],hts[4],wqparam[i],hts[length(hts)],sep="")) + #r <- requestData(vendor[tss],tss_url,paste(hts[1],hts[4],wqparam[i],hts[length(hts)],sep="")) + wqdata <- MeasurementList(xmlmdata=r,requestType="Hilltop") + wqdata$Value <- as.character(wqdata$Value) + wqdata$parameter <- wqparam[i] + } + + # ------------------------ + # Handling censored data + # ------------------------ + + #1. Detect censored data + wqdata_cen <-flagCensoredDataDF(wqdata) + wqdata_cen$Value <- as.numeric(wqdata_cen$Value) + + # Reduce dataset to complete cases only - removes NA's etc + ok <- complete.cases(wqdata_cen[,3]) + wqdata_cen <- wqdata_cen[ok,] + + # 2. Handle Left Censored (<) + # For STATE, half value where CenType==Left + cat("Left Censored\n") + + if(ANALYSIS=="STATE"){ + wqdata_left <- qualifiedValues2(wqdata_cen) + } else { + # For TREND, apply leftCensored() + + if(exists("wqdata_left")){ + rm(wqdata_left) + } + for(x in 1:length(s)){ + # cat(x,s[x],"\n",sep=" ") + tmp<-wqdata_cen[wqdata_cen$SiteName==s[x],] + ok <- complete.cases(tmp[,3]) + tmp <- tmp[ok,] + + # Only process sites that have data + if(length(tmp[,1])!=0){ + if(!exists("wqdata_left")){ + + tmp1<-leftCensored(tmp) + if(tmp1!=FALSE){ + wqdata_left <- tmp1 + rm(tmp1) + } + + } else { + tmp_left<-leftCensored(tmp) + if(tmp_left!=FALSE){ + wqdata_left <- rbind.data.frame(wqdata_left,tmp_left) + } + } + #cat("Found",length(tmp[,1]),"values for",wqparam[i],"at",s[x],"\n") + + } else { + #cat("No",wqparam[i],"at",s[x],"\n") + } + } + } + # 3. Handle Right censored (>) + cat("Right Censored\n") + if(exists("wqdata_right")){ + rm(wqdata_right) + } + for(x in 1:length(s)){ + tmp<-wqdata_left[wqdata_left$SiteName==s[x],] + ok <- complete.cases(tmp[,3]) + tmp <- tmp[ok,] + # Only process sites that have data + if(length(tmp[,1])!=0){ + if(!exists("wqdata_right")){ + + wqdata_right<-rightCensored(tmp) + + } else { + tmp_right<-rightCensored(tmp) + wqdata_right <- rbind.data.frame(wqdata_right,tmp_right) + } + #cat("Found",length(tmp[,1]),"values for",wqparam[i],"at",s[x],"\n") + + } else { + #cat("No",wqparam[i],"at",s[x],"\n") + } + } + + + # 4. Jitter tied data + cat("Jitter\n") + if(exists("wqdata_jitter")){ + rm(wqdata_jitter) + } + for(x in 1:length(s)){ + tmp<-wqdata_right[wqdata_right$SiteName==s[x],] + ok <- complete.cases(tmp[,3]) + tmp <- tmp[ok,] + # Only process sites that have data + if(length(tmp[,1])!=0){ + #cat("Jitter",s[x],"\n") + if(!exists("wqdata_jitter")){ + + wqdata_jitter<-addJitter(tmp) + + } else { + tmp_jitter<-addJitter(tmp) + wqdata_jitter <- rbind.data.frame(wqdata_jitter,tmp_jitter) + } + + } else { + #cat("No",wqparam[i],"at",s[x],"\n") + } + } + + + #wqdata <- merge(wqdata, l, by.x="SiteName",by.y="Site", all.x=TRUE) # using native sitetable sitenames to match + wqdata_jitter$OriginalValue <- wqdata_jitter$Value + wqdata_jitter$Value <- wqdata_jitter$i3Values + + wqdata <- merge(wqdata_right, l, by.x="SiteName",by.y="CouncilSiteID", all.x=TRUE) # Using Hilltop sitenames to match site information + + #wqdata$parameter <- wqparam[i] + + wqdata_q <- wqdata # retaining original data retrieved from webservice + + #wqdata <- merge(wqdata, l, by.x="SiteName",by.y="Site", all.x=TRUE) # using native sitetable sitenames to match + # wqdata <- merge(wqdata, l, by.x="SiteName",by.y="Site", all.x=TRUE) # Using Hilltop sitenames to match site information + # wqdata$parameter <- wqparam[i] + + # Reduce dataset to complete cases only - removes sites that have data in the hilltop + # files, but are not explicitly included in the LAWA site table. + ok <- complete.cases(wqdata[,3]) + wqdata <- wqdata[ok,] + + # Fields have been renamed from WFS feed to match what the code is + # expecting, as code originally written expecting data from Hilltop Site Table. + newFieldNames <- c("SiteName","Date","Value","Method","parameter", "Censored", + "CenType","converted_values","dflag","i1Values","i2Values","X", + "LAWAID","ID", "UsedInLAWA","AltitudeGroup","LanduseGroup","FrequencyAll", + "Frequency","Region","Agency","ISLAND","CatchID","CatchType", + "NZREACH","Catchment","Comments","LawaCatchm","CatchLbl") + + names(wqdata) <- newFieldNames + + + + # Deprecated 18-Sep-2016 as censored data handled by functions + # supplied by Ton Snelder. + #wqdata <- merge(wqdata, tr, by.x="LAWAID",by.y="LAWAID", all.y=TRUE) + + ## --- WHAT IS BEING REORDER HERE AND WHY? --- + # Reorder items in data.frame + #wqdata <- wqdata[order(wqdata[,1],wqdata[,3]),] + + # Building dataframe to save at the end of this step + if(i==1){ + lawadata <- wqdata + lawadata_q <- wqdata_q + } else { + lawadata <- rbind(lawadata,wqdata) + lawadata_q <- rbind(lawadata_q,wqdata_q) + } + + + print(Sys.time() - x) + + +} + +# Housekeeping +# - Saving the lawadata table +save(lawadata,file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trenddata",StartYear,"-",EndYear,".RData",sep="")) +#write.csv(lawadata,"//file/herman/R/OA/08/02/2017/Water Quality/ROutput/LAWA_RAW_DATA_TREND05yr.csv") + +save(lawadata_q,file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trenddata_q_",StartYear,"-",EndYear,".RData",sep="")) + +# disconnect from Hilltop object +Hilltop::disconnect(lawa) + + +# DATA CLEANSE # + +#Calculating the Long term LAWA Trends in water quality. + +#Reference Dates +#StartYear <- Set at start of script +#EndYear <- Set at start of script +years <- EndYear - StartYear + 1 +StartMonth <- 1 +EndMonth <- 12 +if(years==5){ + rate<-1 +} else{ + rate<-0.9 +} +load(file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trenddata",StartYear,"-",EndYear,".RData",sep="")) + +################################################################################### +#Step 0 Data Summary +################################################################################### +#b" Ensure one value per sampling interval +#b" Calculate the number of samples per sampling interval and select/calculate/ +# determine representative values + +lawadata <- samplesByInterval(StartYear,EndYear,StartMonth,EndMonth,lawadata) + +#drop sites with no LAWA ID +lawadata <- lawadata[!is.na(lawadata$LAWAID),] + +df_count <- summaryBy(Value~SiteName+parameter+yearMon,data=lawadata, FUN=c(length), keep.name=TRUE) + + +multipleResultsByMonth <- subset(df_count,Value>1) + +# ---- DOCUMENTATION ---- +# Values are now derived for the three sampling frequencies. +# Medians are derived, rather than nearest values. The original +# justification was the number of samples that Councils would +# deliver in their data for a month - SOE samples were not +# necessarily separated out from other project data. A decision +# was made early on to simply take a median. That decision has +# not been reviewed, but should have been, in the light of +# of the progressive improvements in data supply. + +## Default median calculation +df_value_monthly <- subset(summaryBy(Value~SiteName+parameter+yearMon,data=lawadata, + id=~LAWAID+LanduseGroup+AltitudeGroup+Catchment+Region+Frequency+year+mon+bimon+Qtr+depth, + FUN=c(quantile), prob=c(0.5), type=5, keep.name=TRUE),Frequency=="Monthly") +# +# df_value_bimonthly <- subset(summaryBy(Value~SiteName+parameter+yearBimon,data=lawadata, +# id=~LAWAID+LanduseGroup+AltitudeGroup+Catchment+Region+Frequency+year+mon+bimon+Qtr+depth, +# FUN=c(quantile), prob=c(0.5), type=5, keep.name=TRUE),Frequency=="Monthly" | Frequency=="Bimonthly") + +#df_value_quarterly <- subset(summaryBy(Value~SiteName+parameter+yearQtr,data=lawadata, +# id=~LAWAID+LanduseGroup+AltitudeGroup+Catchment+Region+Frequency+year+mon+bimon+Qtr+depth, +# FUN=c(quantile), prob=c(0.5), type=5, keep.name=TRUE),Frequency=="Monthly" | Frequency=="Bimonthly" | Frequency=="Quarterly") + + +## A slightly less fraught method is to use Aggregate() to return first value in each group +df_value_quarterly <- aggregate(lawadata, list(lawadata$SiteName, + lawadata$parameter, + lawadata$yearQtr), FUN=head, 1) +df_value_quarterly <- df_value_quarterly[,c(4,8,40,6,16,20,19,29,23,22,34,35,37,39,33)] # matching columns to what's generated by summaryBy above. + + +################################################################################### + + +#require(wq) + +#====================================================== +#SEASONAL KENDALL ANALYSIS +#====================================================== + +t <- Sys.time() + + +###################################################################################################### +# Seasonal Kendall for Monthly Sampling for each site, by each parameter + +# Returning unique LAWAIDs for processing data subsets +#lawa_ids <- as.character(unique(df_value_monthly$LAWAID)) +uLAWAID <- unique(df_value_monthly$LAWAID) +months<-12 # Monthly data +monthLbl <- "Monthly" +k <- 1 # counter for sites/parameters meeting minimum N +rbindTrendCheck<-FALSE # Flagging condition for rbinding dataframe that keeps track of sites, measurements and checks for trend inclusion +for(i in 1:length(uLAWAID)){ + # Store current LAWAID + l <- uLAWAID[i] + # Store vector of unique parameters for current LAWAID + uWQParam <- unique(df_value_monthly$parameter[df_value_monthly$LAWAID==l]) + + #l <- lawa_ids[i] + #df_value_monthly1 <- subset(df_value_monthly, LAWAID==l) + #parameters <- as.character(unique(df_value_monthly1$parameter)) + # this step is to double check output with TimeTrends + + ## The following two lines of code have now been rendered unnecessary by changing + ## the library used for the trend analysis from "wq" to "EnvStats". + ## The original "wq" library (now no longer available on CRAN) required a + ## timeseries object to be passed to the seasonal kendall functdion; the + ## EnvStats library only requires a dataframe. + + #lawa <- wqData(data=df_value_monthly1,locus=c(3,5,15),c(2,4),site.order=TRUE,time.format="%Y-%m-%d",type="long") + #x <- tsMake(object=lawa,focus=gsub("-",".",l)) + + # calculating seasonal Kendall statistics for individual parameters for an individual site + + for(j in 1:length(uWQParam)){ + # subsetting dataframe based on lawaid and parameter + x <- df_value_monthly[df_value_monthly$LAWAID==l&df_value_monthly$parameter==uWQParam[j],] + + ### TREND INCLUSION CRITERIA + # 1. Count samples in the first or last 12 month periods to compare to entry criteria for trend + first_year<-length(x$year[x$year==StartYear]) + last__year<-length(x$year[x$year==EndYear]) + + # 2. Count samples in order to compare to entry criteria for trend + num_samples <- length(x[,1]) + + # building dataframe to report out data to assess pass-fails for trend inclusion + v <- matrix(data=c(l,uWQParam[j],first_year,last__year,num_samples),nrow=1,ncol=5,byrow=TRUE) + if(!rbindTrendCheck){ + validDataForTrend <-as.data.frame(v,stringsAsFactors=FALSE) + rbindTrendCheck<-TRUE + } else { + validDataForTrend <- rbind(validDataForTrend, as.data.frame(v,stringsAsFactors=FALSE)) + } + + # Check Trend Criteria - Assess pass-fail for trends analysis + PassTrendCriteria <- TrendCriteria(first_year, last__year, num_samples, rate, years, months, monthMultiplier = 1) + + # Processing Trends for sites/parameters pass trend criteria + if(PassTrendCriteria){ + + s <- seaKenEPA(x) + list(sen.slope = sk.sen.slope, + sen.slope.pct = sk.sen.slope.pct, + p.value = sk.p.value, + sen.z = sk.sen.z, + sen.z.prob = sk.sen.z.prob, + slope.lcl90 = sk.slope.LCL90, + slope.ucl90 = sk.slope.UCL90) + + m <-data.frame(l, + uWQParam[j], + s$sen.slope.pct, + s$sen.slope, + s$p.value, + sen.z, + sen.z.prob, + s$slope.lcl90, + s$slope.ucl90, + stringsAsFactors = FALSE) + if(k==1){ + seasonalkendall <- m + #cat("seasonalkendal dataframe created\n") + } else { + seasonalkendall <- rbind(seasonalkendall, m) + #cat("Appending to seasonalkendall dataframe\n") + } + + rm(s,m) + k <- k + 1 + + } + } + +} + +names(seasonalkendall) <- c("LAWAID", + "Parameter", + "Sen.Pct", + "Sen.Slope", + "p.value", + "sen.z", + "sen.z.prob", + "Sen.Slope.LCL90", + "Sen.Slope.UCL90") + +seasonalkendall$freq<- monthLbl +trendscores <- calcTrendScore(seasonalkendall) + +names(validDataForTrend) <- c("LAWAID","Parameter","N.Months.StartYear","N.Months.EndYear","Num.Samples") +validDataForTrend$freq<- monthLbl + + +rm(m) + +load(file="//file/herman/R/OA/08/02/2017/Water Quality/ROutput/lawa_sitetable.RData") + + +# Fields have been renamed from WFS feed to match what the code is +# expecting, as code originally written expecting data from Hilltop Site Table. +newFieldNames <- c("X","LAWAID","ID", "CouncilSiteID", "UsedInLAWA","AltitudeGroup","LanduseGroup","FrequencyAll","Frequency", + "Region","Agency","ISLAND","CatchID","CatchType", + "NZREACH","Catchment","Comments","LawaCatchm") + +names(l) <- newFieldNames + +trends <- merge(trendscores, l, by.x="LAWAID",by.y="LAWAID",all.x=TRUE) # Using LAWAIDs to join tables +rm(seasonalkendall) +write.csv(trends,file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trend_monthly_",StartYear,"-",EndYear,".csv",sep="")) +write.csv(validDataForTrend,file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/validDataForTrend_monthly_",StartYear,"-",EndYear,".csv",sep="")) + +cat("LAWA Water QUality Trend Analysis\nCompleted assigning Trend Results for Monthly Data\n") +cat(paste(k,"Sites/Parameter combinations meet 90 percent sampling occasion requirements\n")) +rm(trends,validDataForTrend) + +# ###################################################################################################### +# # Seasonal Kendall for Bimonthly Sampling for each site, by each parameter +# +# # Returning unique LAWAIDs for processing data subsets +# lawa_ids <- as.character(unique(df_value_bimonthly$LAWAID)) +# +# k <- 1 # counter for sites/parameters meeting minimum N +# rbindTrendCheck<-FALSE # Flagging condition for rbinding dataframe that keeps track of sites, measurements and checks for trend inclusion +# for(i in 1:length(lawa_ids)){ +# +# months<-6 +# l <- lawa_ids[i] +# df_value_bimonthly1 <- subset(df_value_bimonthly, LAWAID==l) +# parameters <- as.character(unique(df_value_bimonthly1$parameter)) +# # this step is to double check output with TimeTrends +# #Uncomment if needed +# #write.csv(df_value_monthly1,file=paste("c:/data/MWR_2013/2013/ES-00022.csv",sep="")) +# +# lawa <- wqData(data=df_value_bimonthly1,locus=c(3,5,15),c(2,4),site.order=TRUE,time.format="%Y-%m-%d",type="long") +# x <- tsMake(object=lawa,focus=gsub("-",".",l)) +# #cat("length(parameters)",length(parameters),"\n") +# #cat(parameters,"\n") +# +# # calculating seasonal Kendall statistics for individual parameters for an individual site +# for(j in 1:length(parameters)){ +# ### TREND INCLUSION CRITERIA +# # 1. Count samples in the first or last 20 percent of the 5 year window to compare to entry criteria for trend +# first_20pct<-length(df_value_bimonthly1$year[df_value_bimonthly1$year==StartYear & df_value_bimonthly1$parameter==parameters[j]]) +# last__20pct<-length(df_value_bimonthly1$year[df_value_bimonthly1$year==EndYear & df_value_bimonthly1$parameter==parameters[j]]) +# # 2. Count samples in order to compare to entry criteria for trend +# num_samples <- length(subset(df_value_bimonthly1,parameter==parameters[j])[,1]) +# +# # building dataframe to report out data to assess pass-fails for trend inclusion +# v <- matrix(data=c(l,parameters[j],first_20pct,last__20pct,num_samples),nrow=1,ncol=5,byrow=TRUE) +# if(!rbindTrendCheck){ +# validDataForTrend <-as.data.frame(v,stringsAsFactors=FALSE) +# rbindTrendCheck<-TRUE +# } else { +# validDataForTrend <- rbind(validDataForTrend, as.data.frame(v,stringsAsFactors=FALSE)) +# } +# +# # Check Trend Criteria - Assess pass-fail for trends analysis +# PassTrendCriteria <- TrendCriteria(first_20pct, last__20pct, num_samples, rate, years, months,monthMultiplier = 1) # This month multiplier is linked to the +# # the number of months selected here +# # Processing Trends for sites/parameters pass trend criteria +# if(PassTrendCriteria){ +# +# if(length(parameters)==1){ +# s<-seaKenLAWA(x,"median") # x has a different structure where there is only one item +# } else{ +# s<-seaKenLAWA(x[,j],"median") +# } +# #cat(i,lawa_ids[i],length(lawa$time),parameters[j],s$p.value,s$sen.slope.pct,"\n") +# +# #s$sen.slope.pct # <---- required for LAWA +# #s$sen.slope # <---- +# #s$p.value # <---- required for LAWA +# +# +# m <-matrix(data=c(l,parameters[j],s$sen.slope.pct,s$sen.slope,s$p.value),nrow=1,ncol=5,byrow=TRUE) +# if(k==1){ # removed i==i condition - causing errors where first site doesn't meet criteria for trend analysis +# seasonalkendall <-as.data.frame(m,stringsAsFactors=FALSE) +# #cat("seasonalkendal dataframe created\n") +# } else { +# seasonalkendall <- rbind(seasonalkendall, as.data.frame(m,stringsAsFactors=FALSE)) +# #cat("Appending to seasonalkendall dataframe\n") +# } +# k <- k + 1 +# } +# } +# +# } +# +# names(seasonalkendall) <- c("LAWAID","Parameter","Sen.Pct","Sen.Slope","p.value") +# names(validDataForTrend) <- c("LAWAID","Parameter","N.Months.StartYear","N.Months.EndYear","Num.Samples") +# validDataForTrend$freq<- "Bimonthly" +# +# seasonalkendall$Sen.Pct <-as.numeric(as.character(seasonalkendall$Sen.Pct)) +# seasonalkendall$Sen.Slope <-as.numeric(as.character(seasonalkendall$Sen.Slope)) +# seasonalkendall$p.value <-as.numeric(as.character(seasonalkendall$p.value)) +# +# seasonalkendall$freq<- "Bimonthly" +# trendscores <- calcTrendScore(seasonalkendall) +# +# rm(m) +# +# load(file="//file/herman/R/OA/08/02/2017/Water Quality/ROutput/lawa_sitetable.RData") +# +# # Fields have been renamed from WFS feed to match what the code is +# # expecting, as code originally written expecting data from Hilltop Site Table. +# newFieldNames <- c("X","LAWAID","ID", "CouncilSiteID", "UsedInLAWA","AltitudeGroup","LanduseGroup","FrequencyAll","Frequency", +# "Region","Agency","ISLAND","CatchID","CatchType", +# "NZREACH","Catchment","Comments","LawaCatchm") +# +# names(l) <- newFieldNames +# +# trends <- merge(trendscores, l, by.x="LAWAID",by.y="LAWAID",all.x=TRUE) # Using LAWAIDs to join tables +# rm(seasonalkendall) +# write.csv(trends,file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trend_bimonthly_",StartYear,"-",EndYear,".csv",sep="")) +# write.csv(validDataForTrend,file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/validDataForTrend_bimonthly_",StartYear,"-",EndYear,".csv",sep="")) +# +# cat("LAWA Water QUality Trend Analysis\nCompleted assigning Trend Results for Bimonthly Data\n") +# cat(paste(k,"Sites/Parameter combinations meet 90 percent sampling occasion requirements\n")) +# rm(trends,validDataForTrend) + +###################################################################################################### +# Seasonal Kendall for Quarterly Sampling for each site, by each parameter + +# Returning unique LAWAIDs for processing data subsets +#lawa_ids <- as.character(unique(df_value_monthly$LAWAID)) +uLAWAID <- unique(df_value_quarterly$LAWAID) +months<-4 # Quarterly data +monthLbl <- "Quarterly" +k <- 1 # counter for sites/parameters meeting minimum N +rbindTrendCheck<-FALSE # Flagging condition for rbinding dataframe that keeps track of sites, measurements and checks for trend inclusion +for(i in 1:length(uLAWAID)){ + # Store current LAWAID + l <- uLAWAID[i] + # Store vector of unique parameters for current LAWAID + uWQParam <- unique(df_value_quarterly$parameter[df_value_quarterly$LAWAID==l]) + + #l <- lawa_ids[i] + #df_value_monthly1 <- subset(df_value_monthly, LAWAID==l) + #parameters <- as.character(unique(df_value_monthly1$parameter)) + # this step is to double check output with TimeTrends + + ## The following two lines of code have now been rendered unnecessary by changing + ## the library used for the trend analysis from "wq" to "EnvStats". + ## The original "wq" library (now no longer available on CRAN) required a + ## timeseries object to be passed to the seasonal kendall functdion; the + ## EnvStats library only requires a dataframe. + + #lawa <- wqData(data=df_value_monthly1,locus=c(3,5,15),c(2,4),site.order=TRUE,time.format="%Y-%m-%d",type="long") + #x <- tsMake(object=lawa,focus=gsub("-",".",l)) + + # calculating seasonal Kendall statistics for individual parameters for an individual site + + for(j in 1:length(uWQParam)){ + # subsetting dataframe based on lawaid and parameter + x <- df_value_quarterly[df_value_quarterly$LAWAID==l&df_value_quarterly$parameter==uWQParam[j],] + + ### TREND INCLUSION CRITERIA + # 1. Count samples in the first or last 12 month periods to compare to entry criteria for trend + first_year<-length(x$year[x$year==StartYear]) + last__year<-length(x$year[x$year==EndYear]) + + # 2. Count samples in order to compare to entry criteria for trend + num_samples <- length(x[,1]) + + # building dataframe to report out data to assess pass-fails for trend inclusion + v <- matrix(data=c(l,uWQParam[j],first_year,last__year,num_samples),nrow=1,ncol=5,byrow=TRUE) + if(!rbindTrendCheck){ + validDataForTrend <-as.data.frame(v,stringsAsFactors=FALSE) + rbindTrendCheck<-TRUE + } else { + validDataForTrend <- rbind(validDataForTrend, as.data.frame(v,stringsAsFactors=FALSE)) + } + + # Check Trend Criteria - Assess pass-fail for trends analysis + PassTrendCriteria <- TrendCriteria(first_year, last__year, num_samples, rate, years, months, monthMultiplier = 1) + + # Processing Trends for sites/parameters pass trend criteria + if(PassTrendCriteria){ + + s <- seaKenEPA(x) + list(sen.slope = sk.sen.slope, + sen.slope.pct = sk.sen.slope.pct, + p.value = sk.p.value, + sen.z = sk.sen.z, + sen.z.prob = sk.sen.z.prob, + slope.lcl90 = sk.slope.LCL90, + slope.ucl90 = sk.slope.UCL90) + + m <-data.frame(l, + uWQParam[j], + s$sen.slope.pct, + s$sen.slope, + s$p.value, + sen.z, + sen.z.prob, + s$slope.lcl90, + s$slope.ucl90, + stringsAsFactors = FALSE) + if(k==1){ + seasonalkendall <- m + #cat("seasonalkendal dataframe created\n") + } else { + seasonalkendall <- rbind(seasonalkendall, m) + #cat("Appending to seasonalkendall dataframe\n") + } + + rm(s,m) + k <- k + 1 + + } + } + +} + +names(seasonalkendall) <- c("LAWAID", + "Parameter", + "Sen.Pct", + "Sen.Slope", + "p.value", + "sen.z", + "sen.z.prob", + "Sen.Slope.LCL90", + "Sen.Slope.UCL90") + +seasonalkendall$freq<- monthLbl +trendscores <- calcTrendScore(seasonalkendall) + +names(validDataForTrend) <- c("LAWAID","Parameter","N.Months.StartYear","N.Months.EndYear","Num.Samples") +validDataForTrend$freq<- monthLbl + + +rm(m) + +load(file="//file/herman/R/OA/08/02/2017/Water Quality/ROutput/lawa_sitetable.RData") + +# Fields have been renamed from WFS feed to match what the code is +# expecting, as code originally written expecting data from Hilltop Site Table. +#newFieldNames <- c("X","LAWAID","ID", "UsedInLAWA","AltitudeGroup","LanduseGroupAll","LanduseGroup","FrequencyAll","Frequency", +# "Region","Agency","ISLAND","CatchID","CatchType", +# "NZREACH","Catchment","Comments","LawaCatchm") + +newFieldNames <- c("X","LAWAID","ID", "CouncilSiteID", "UsedInLAWA","AltitudeGroup","LanduseGroup","FrequencyAll","Frequency", + "Region","Agency","ISLAND","CatchID","CatchType", + "NZREACH","Catchment","Comments","LawaCatchm") + +names(l) <- newFieldNames + +trends <- merge(trendscores, l, by.x="LAWAID",by.y="LAWAID",all.x=TRUE) # Using LAWAIDs to join tables +rm(seasonalkendall) +write.csv(trends,file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trend_quarterly_",StartYear,"-",EndYear,".csv",sep="")) +write.csv(validDataForTrend,file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/validDataForTrend_quarterly_",StartYear,"-",EndYear,".csv",sep="")) + +cat("LAWA Water QUality Trend Analysis\nCompleted assigning Trend Results for Quarterly Data\n") +cat(paste(k,"Sites/Parameter combinations meet 90 percent sampling occasion requirements\n")) +rm(trends,validDataForTrend) + +###################################################################################################### + + +print(Sys.time()-t) +setwd(od) + + +#=================================================================================================== +# LAWA TREND ANALYSIS +# Horizons Regional Council +# +# 17 September 2014 +# +# Maree Clark +# Sean Hodges +# Horizons Regional Council +#=================================================================================================== + rm(list = ls()) +TRENDPERIOD <- 10 # years + + +# Clearing workspace + ANALYSIS<-"TREND" # Set working directory @@ -32,11 +825,11 @@ setwd(wd) x <- Sys.time() -TRENDPERIOD <- 10 # years - #Reference Dates EndYear <- 2016 + + StartYear <- EndYear - TRENDPERIOD + 1 #if(!exists(foo, mode="function")) source("lawa_state_functions.R") @@ -44,6 +837,14 @@ StartYear <- EndYear - TRENDPERIOD + 1 #/* -===Include required function libraries===- */ source("scripts/WQualityStateTrend/lawa_state_functions.R") +HilltopLibrary<-FALSE +library(Hilltop) +HilltopLibrary<-TRUE +#-- Specifying source files - note that these connections are pulling from SOE folder on HilltopDEV + +if(HilltopLibrary==TRUE){ + lawa <- Hilltop::HilltopData("//hilltopdev/data/lawa2017/state/lawa_provisional_2017.dsn") +} #/* -===Global variable/constant definitions===- */ vendor <- c("52NORTH","AQUATIC","HILLTOP","KISTERS") @@ -52,7 +853,7 @@ seasons <- read.csv("seasons.csv") #/* -===Local variable/constant definitions===- */ wqparam <- c("BDISC","TURB","NH4","TON","TN","DRP","TP","ECOLI") -#wqparam <- c("BDISC") + tss <- 3 # tss = time series server # tss_url <- "http://hilltopdev.horizons.govt.nz/lawa2014trend05.hts?" # tss_url <- "http://hilltopdev.horizons.govt.nz:8080/lawa2017trend05.lawa?" @@ -76,7 +877,7 @@ hts <- c("service=Hilltop", # Site data request #l <- SiteTable(databasePathFileName="//ares/waterquality/LAWA/2013/hilltop.mdb",sqlID=2) ## Assumes all sites have hilltop.mdb site names -requestData(vendor[tss],tss_url,"service=Hilltop&request=Reset") +if(HilltopLibrary!=TRUE) requestData(vendor[tss],tss_url,"service=Hilltop&request=Reset") # Replace database call here with call to previously loaded WFS Site data #l <- SiteTable(databasePathFileName="//file/herman/R/OA/08/02/2017/MASTER SiteList/lawa_2017.mdb",sqlID=3) ## Allows for different sitenames in hilltop.mdb - requires assessment and population of the database. @@ -84,8 +885,17 @@ l <- read.csv("LAWA_Site_Table1.csv",stringsAsFactors=FALSE) l$SWQLanduse[l$SWQLanduse=="Native"|l$SWQLanduse=="Exotic"|l$SWQLanduse=="Natural"] <- "Forest" -r <- requestData(vendor[tss],tss_url,request=paste(hts[1],hts[2],sep="")) -s <- SiteList(r) + +## fixing gaps in Christchurch city council data +l$SiteID[l$SiteID==""] <- l$CouncilSiteID[l$SiteID==""] + + +if(HilltopLibrary==TRUE){ + s <- Hilltop::SiteList(lawa) +} else { + r <- requestData(vendor[tss],tss_url,request=paste(hts[1],hts[2],sep="")) + s <- SiteList(r) +} # Load Reference data for Trends --- NO LONGER REQUIRED WITH FUNCTIONS FROM TON @@ -98,18 +908,45 @@ cat("LAWA Water QUality TREND Analysis\n","Number of sites returned:",length(s)) # -=== WQ PARAMETERS ===- #requestData(vendor[tss],tss_url,"service=Hilltop&request=Reset") for(i in 1:length(wqparam)){ - - # Deprecated 18-Sep-2016 as censored data handled by functions - # supplied by Ton Snelder. - #tr <- subset(trendRules_csv,DefaultMeasurement==wqparam[i] & Trend=="5years" & Rule=="Halve non detect" & UsedInLAWA==TRUE) - requestData(vendor[tss],tss_url,"service=Hilltop&request=Reset") + ## Added Hilltop library 2017-09-07 + if(HilltopLibrary==FALSE){ + requestData(vendor[tss],tss_url,"service=Hilltop&request=Reset") + } cat("Starting",wqparam[i],"\n") - r <- readUrl(vendor[tss],tss_url,paste(hts[1],hts[4],wqparam[i],hts[length(hts)],sep="")) - #r <- requestData(vendor[tss],tss_url,paste(hts[1],hts[4],wqparam[i],hts[length(hts)],sep="")) - wqdata <- MeasurementList(xmlmdata=r,requestType="Hilltop") - wqdata$Value <- as.character(wqdata$Value) - wqdata$parameter <- wqparam[i] + if(HilltopLibrary==TRUE){ + lawa_collection <- GetCollection.HilltopData(lawa, "//hilltopdev/c$/HilltopServer/LAWA_collections.xml",paste("LAWA",wqparam[i],sep="_")) + mySites<-unique(lawa_collection[,1]) + myMeas<-unique(lawa_collection[,2]) + for(ii in 1:length(mySites)){ + dx <- try(GetData(lawa,mySites[ii], myMeas, startTime=paste(StartYear,"-01-01",sep=""), endTime=paste(EndYear + 1,"-01-01",sep=""), WQParams=FALSE),silent=TRUE) + if(attr(dx,"class")!="try-error"){ + if(ii==1){ + x1 <- unlist(attr(dx,"dimnames")) + x1df <- data.frame(index(dx),as.character(coredata(dx)),stringsAsFactors = FALSE) + x1df$SiteName<-mySites[ii];x1df$parameter<-wqparam[i];x1df$Method<-"" + x1df <- x1df[,c(3,1,2,5,4)] + names(x1df) <- c("SiteName" , "Date" , "Value" , "Method" , "parameter") + wqdata <- x1df + } else { + x1 <- unlist(attr(dx,"dimnames")) + x1df <- data.frame(index(dx),as.character(coredata(dx)),stringsAsFactors = FALSE) + x1df$SiteName<-mySites[ii];x1df$parameter<-wqparam[i];x1df$Method<-"" + x1df <- x1df[,c(3,1,2,5,4)] + names(x1df) <- c("SiteName" , "Date" , "Value" , "Method" , "parameter") + wqdata <- rbind.data.frame(wqdata,x1df,stringsAsFactors = FALSE) + } + rm(x1,x1df) + } + } + + } else { + r <- readUrl(vendor[tss],tss_url,paste(hts[1],hts[4],wqparam[i],hts[length(hts)],sep="")) + #r <- requestData(vendor[tss],tss_url,paste(hts[1],hts[4],wqparam[i],hts[length(hts)],sep="")) + wqdata <- MeasurementList(xmlmdata=r,requestType="Hilltop") + wqdata$Value <- as.character(wqdata$Value) + wqdata$parameter <- wqparam[i] + } # ------------------------ # Handling censored data @@ -136,11 +973,11 @@ for(i in 1:length(wqparam)){ rm(wqdata_left) } for(x in 1:length(s)){ - - cat(x,s[x],"\n",sep=" ") + # cat(x,s[x],"\n",sep=" ") tmp<-wqdata_cen[wqdata_cen$SiteName==s[x],] ok <- complete.cases(tmp[,3]) - tmp <- tmp[ok,] + tmp <- tmp[ok,] + # Only process sites that have data if(length(tmp[,1])!=0){ if(!exists("wqdata_left")){ @@ -229,9 +1066,9 @@ for(i in 1:length(wqparam)){ wqdata_q <- wqdata # retaining original data retrieved from webservice #wqdata <- merge(wqdata, l, by.x="SiteName",by.y="Site", all.x=TRUE) # using native sitetable sitenames to match -# wqdata <- merge(wqdata, l, by.x="SiteName",by.y="Site", all.x=TRUE) # Using Hilltop sitenames to match site information -# wqdata$parameter <- wqparam[i] - + # wqdata <- merge(wqdata, l, by.x="SiteName",by.y="Site", all.x=TRUE) # Using Hilltop sitenames to match site information + # wqdata$parameter <- wqparam[i] + # Reduce dataset to complete cases only - removes sites that have data in the hilltop # files, but are not explicitly included in the LAWA site table. ok <- complete.cases(wqdata[,3]) @@ -279,6 +1116,9 @@ save(lawadata,file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/tr save(lawadata_q,file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trenddata_q_",StartYear,"-",EndYear,".RData",sep="")) +# disconnect from Hilltop object +Hilltop::disconnect(lawa) + # DATA CLEANSE # @@ -304,33 +1144,51 @@ load(file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trenddata", #b" Calculate the number of samples per sampling interval and select/calculate/ # determine representative values - lawadata <- samplesByInterval(StartYear,EndYear,StartMonth,EndMonth,lawadata) +#drop sites with no LAWA ID +lawadata <- lawadata[!is.na(lawadata$LAWAID),] df_count <- summaryBy(Value~SiteName+parameter+yearMon,data=lawadata, FUN=c(length), keep.name=TRUE) multipleResultsByMonth <- subset(df_count,Value>1) +# ---- DOCUMENTATION ---- +# Values are now derived for the three sampling frequencies. +# Medians are derived, rather than nearest values. The original +# justification was the number of samples that Councils would +# deliver in their data for a month - SOE samples were not +# necessarily separated out from other project data. A decision +# was made early on to simply take a median. That decision has +# not been reviewed, but should have been, in the light of +# of the progressive improvements in data supply. + ## Default median calculation df_value_monthly <- subset(summaryBy(Value~SiteName+parameter+yearMon,data=lawadata, id=~LAWAID+LanduseGroup+AltitudeGroup+Catchment+Region+Frequency+year+mon+bimon+Qtr+depth, FUN=c(quantile), prob=c(0.5), type=5, keep.name=TRUE),Frequency=="Monthly") +# +# df_value_bimonthly <- subset(summaryBy(Value~SiteName+parameter+yearBimon,data=lawadata, +# id=~LAWAID+LanduseGroup+AltitudeGroup+Catchment+Region+Frequency+year+mon+bimon+Qtr+depth, +# FUN=c(quantile), prob=c(0.5), type=5, keep.name=TRUE),Frequency=="Monthly" | Frequency=="Bimonthly") + +#df_value_quarterly <- subset(summaryBy(Value~SiteName+parameter+yearQtr,data=lawadata, +# id=~LAWAID+LanduseGroup+AltitudeGroup+Catchment+Region+Frequency+year+mon+bimon+Qtr+depth, +# FUN=c(quantile), prob=c(0.5), type=5, keep.name=TRUE),Frequency=="Monthly" | Frequency=="Bimonthly" | Frequency=="Quarterly") -df_value_bimonthly <- subset(summaryBy(Value~SiteName+parameter+yearBimon,data=lawadata, - id=~LAWAID+LanduseGroup+AltitudeGroup+Catchment+Region+Frequency+year+mon+bimon+Qtr+depth, - FUN=c(quantile), prob=c(0.5), type=5, keep.name=TRUE),Frequency=="Monthly" | Frequency=="Bimonthly") -df_value_quarterly <- subset(summaryBy(Value~SiteName+parameter+yearQtr,data=lawadata, - id=~LAWAID+LanduseGroup+AltitudeGroup+Catchment+Region+Frequency+year+mon+bimon+Qtr+depth, - FUN=c(quantile), prob=c(0.5), type=5, keep.name=TRUE),Frequency=="Monthly" | Frequency=="Bimonthly" | Frequency=="Quarterly") +## A slightly less fraught method is to use Aggregate() to return first value in each group +df_value_quarterly <- aggregate(lawadata, list(lawadata$SiteName, + lawadata$parameter, + lawadata$yearQtr), FUN=head, 1) +df_value_quarterly <- df_value_quarterly[,c(4,8,40,6,16,20,19,29,23,22,34,35,37,39,33)] # matching columns to what's generated by summaryBy above. ################################################################################### -require(wq) +#require(wq) #====================================================== #SEASONAL KENDALL ANALYSIS @@ -343,34 +1201,48 @@ t <- Sys.time() # Seasonal Kendall for Monthly Sampling for each site, by each parameter # Returning unique LAWAIDs for processing data subsets -lawa_ids <- as.character(unique(df_value_monthly$LAWAID)) - -k <- 1 # counter for sites/parameters meeting minimum N +#lawa_ids <- as.character(unique(df_value_monthly$LAWAID)) +uLAWAID <- unique(df_value_monthly$LAWAID) +months<-12 # Monthly data +monthLbl <- "Monthly" +k <- 1 # counter for sites/parameters meeting minimum N rbindTrendCheck<-FALSE # Flagging condition for rbinding dataframe that keeps track of sites, measurements and checks for trend inclusion -for(i in 1:length(lawa_ids)){ +for(i in 1:length(uLAWAID)){ + # Store current LAWAID + l <- uLAWAID[i] + # Store vector of unique parameters for current LAWAID + uWQParam <- unique(df_value_monthly$parameter[df_value_monthly$LAWAID==l]) - months<-12 - l <- lawa_ids[i] - df_value_monthly1 <- subset(df_value_monthly, LAWAID==l) - parameters <- as.character(unique(df_value_monthly1$parameter)) + #l <- lawa_ids[i] + #df_value_monthly1 <- subset(df_value_monthly, LAWAID==l) + #parameters <- as.character(unique(df_value_monthly1$parameter)) # this step is to double check output with TimeTrends - lawa <- wqData(data=df_value_monthly1,locus=c(3,5,15),c(2,4),site.order=TRUE,time.format="%Y-%m-%d",type="long") - #cat(i,lawa_ids[i],"\n") - x <- tsMake(object=lawa,focus=gsub("-",".",l)) + ## The following two lines of code have now been rendered unnecessary by changing + ## the library used for the trend analysis from "wq" to "EnvStats". + ## The original "wq" library (now no longer available on CRAN) required a + ## timeseries object to be passed to the seasonal kendall functdion; the + ## EnvStats library only requires a dataframe. + + #lawa <- wqData(data=df_value_monthly1,locus=c(3,5,15),c(2,4),site.order=TRUE,time.format="%Y-%m-%d",type="long") + #x <- tsMake(object=lawa,focus=gsub("-",".",l)) # calculating seasonal Kendall statistics for individual parameters for an individual site - for(j in 1:length(parameters)){ + for(j in 1:length(uWQParam)){ + # subsetting dataframe based on lawaid and parameter + x <- df_value_monthly[df_value_monthly$LAWAID==l&df_value_monthly$parameter==uWQParam[j],] + ### TREND INCLUSION CRITERIA # 1. Count samples in the first or last 12 month periods to compare to entry criteria for trend - first_year<-length(df_value_monthly1$year[df_value_monthly1$year==StartYear & df_value_monthly1$parameter==parameters[j]]) - last__year<-length(df_value_monthly1$year[df_value_monthly1$year==EndYear & df_value_monthly1$parameter==parameters[j]]) + first_year<-length(x$year[x$year==StartYear]) + last__year<-length(x$year[x$year==EndYear]) + # 2. Count samples in order to compare to entry criteria for trend - num_samples <- length(subset(df_value_monthly1,parameter==parameters[j])[,1]) + num_samples <- length(x[,1]) # building dataframe to report out data to assess pass-fails for trend inclusion - v <- matrix(data=c(l,parameters[j],first_year,last__year,num_samples),nrow=1,ncol=5,byrow=TRUE) + v <- matrix(data=c(l,uWQParam[j],first_year,last__year,num_samples),nrow=1,ncol=5,byrow=TRUE) if(!rbindTrendCheck){ validDataForTrend <-as.data.frame(v,stringsAsFactors=FALSE) rbindTrendCheck<-TRUE @@ -379,47 +1251,69 @@ for(i in 1:length(lawa_ids)){ } # Check Trend Criteria - Assess pass-fail for trends analysis - PassTrendCriteria <- TrendCriteria(first_year, last__year, num_samples, rate, years, months,monthMultiplier = 1) - + PassTrendCriteria <- TrendCriteria(first_year, last__year, num_samples, rate, years, months, monthMultiplier = 1) + # Processing Trends for sites/parameters pass trend criteria if(PassTrendCriteria){ - if(length(parameters)==1){ - s<-seaKenLAWA(x,"median") # x has a different structure where there is only one item - } else{ - s<-seaKenLAWA(x[,j],"median") - } - #cat(i,lawa_ids[i],length(lawa$time),parameters[j],s$p.value,s$sen.slope.pct,"\n") - #s$sen.slope.pct # <---- required for LAWA - #s$sen.slope # <---- - #s$p.value # <---- required for LAWA - + s <- seaKenEPA(x) + list(sen.slope = sk.sen.slope, + sen.slope.pct = sk.sen.slope.pct, + p.value = sk.p.value, + sen.z = sk.sen.z, + sen.z.prob = sk.sen.z.prob, + slope.lcl90 = sk.slope.LCL90, + slope.ucl90 = sk.slope.UCL90) - m <-matrix(data=c(l,parameters[j],s$sen.slope.pct,s$sen.slope,s$p.value),nrow=1,ncol=5,byrow=TRUE) - if(k==1){ # removed i==i condition - causing errors where first site doesn't meet criteria for trend analysis - seasonalkendall <-as.data.frame(m,stringsAsFactors=FALSE) + m <-data.frame(l, + uWQParam[j], + s$sen.slope.pct, + s$sen.slope, + s$p.value, + sen.z, + sen.z.prob, + s$slope.lcl90, + s$slope.ucl90, + stringsAsFactors = FALSE) + if(k==1){ + seasonalkendall <- m #cat("seasonalkendal dataframe created\n") } else { - seasonalkendall <- rbind(seasonalkendall, as.data.frame(m,stringsAsFactors=FALSE)) + seasonalkendall <- rbind(seasonalkendall, m) #cat("Appending to seasonalkendall dataframe\n") } - #cat(k,"\n") + + rm(s,m) k <- k + 1 + } } } -names(seasonalkendall) <- c("LAWAID","Parameter","Sen.Pct","Sen.Slope","p.value") +names(seasonalkendall) <- c("LAWAID", + "Parameter", + "Sen.Pct", + "Sen.Slope", + "p.value", + "sen.z", + "sen.z.prob", + "Sen.Slope.LCL90", + "Sen.Slope.UCL90") + +seasonalkendall$freq<- monthLbl +trendscores <- calcTrendScore(seasonalkendall) + names(validDataForTrend) <- c("LAWAID","Parameter","N.Months.StartYear","N.Months.EndYear","Num.Samples") -validDataForTrend$freq<- "Monthly" +validDataForTrend$freq<- monthLbl -seasonalkendall$Sen.Pct <-as.numeric(as.character(seasonalkendall$Sen.Pct)) -seasonalkendall$Sen.Slope <-as.numeric(as.character(seasonalkendall$Sen.Slope)) -seasonalkendall$p.value <-as.numeric(as.character(seasonalkendall$p.value)) -seasonalkendall$freq<- "Monthly" -trendscores <- calcTrendScore(seasonalkendall) +#names(seasonalkendall) <- c("LAWAID","Parameter","Sen.Pct","Sen.Slope","p.value") + +#seasonalkendall$Sen.Pct <-as.numeric(as.character(seasonalkendall$Sen.Pct)) +#seasonalkendall$Sen.Slope <-as.numeric(as.character(seasonalkendall$Sen.Slope)) +#seasonalkendall$p.value <-as.numeric(as.character(seasonalkendall$p.value)) + rm(m) @@ -440,147 +1334,160 @@ write.csv(trends,file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput write.csv(validDataForTrend,file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/validDataForTrend_monthly_",StartYear,"-",EndYear,".csv",sep="")) cat("LAWA Water QUality Trend Analysis\nCompleted assigning Trend Results for Monthly Data\n") -cat(paste(k,"Sites/Parameter combinations meet 75 percent sampling occasion requirements\n")) +cat(paste(k,"Sites/Parameter combinations meet 90 percent sampling occasion requirements\n")) rm(trends,validDataForTrend) -###################################################################################################### -# Seasonal Kendall for Bimonthly Sampling for each site, by each parameter - -# Returning unique LAWAIDs for processing data subsets -lawa_ids <- as.character(unique(df_value_bimonthly$LAWAID)) - -k <- 1 # counter for sites/parameters meeting minimum N -rbindTrendCheck<-FALSE # Flagging condition for rbinding dataframe that keeps track of sites, measurements and checks for trend inclusion -for(i in 1:length(lawa_ids)){ - - months<-6 - l <- lawa_ids[i] - df_value_bimonthly1 <- subset(df_value_bimonthly, LAWAID==l) - parameters <- as.character(unique(df_value_bimonthly1$parameter)) - # this step is to double check output with TimeTrends - #Uncomment if needed - #write.csv(df_value_monthly1,file=paste("c:/data/MWR_2013/2013/ES-00022.csv",sep="")) - - lawa <- wqData(data=df_value_bimonthly1,locus=c(3,5,15),c(2,4),site.order=TRUE,time.format="%Y-%m-%d",type="long") - x <- tsMake(object=lawa,focus=gsub("-",".",l)) - #cat("length(parameters)",length(parameters),"\n") - #cat(parameters,"\n") - - # calculating seasonal Kendall statistics for individual parameters for an individual site - for(j in 1:length(parameters)){ - ### TREND INCLUSION CRITERIA - # 1. Count samples in the first or last 20 percent of the 5 year window to compare to entry criteria for trend - first_20pct<-length(df_value_bimonthly1$year[df_value_bimonthly1$year==StartYear & df_value_bimonthly1$parameter==parameters[j]]) - last__20pct<-length(df_value_bimonthly1$year[df_value_bimonthly1$year==EndYear & df_value_bimonthly1$parameter==parameters[j]]) - # 2. Count samples in order to compare to entry criteria for trend - num_samples <- length(subset(df_value_bimonthly1,parameter==parameters[j])[,1]) - - # building dataframe to report out data to assess pass-fails for trend inclusion - v <- matrix(data=c(l,parameters[j],first_20pct,last__20pct,num_samples),nrow=1,ncol=5,byrow=TRUE) - if(!rbindTrendCheck){ - validDataForTrend <-as.data.frame(v,stringsAsFactors=FALSE) - rbindTrendCheck<-TRUE - } else { - validDataForTrend <- rbind(validDataForTrend, as.data.frame(v,stringsAsFactors=FALSE)) - } - - # Check Trend Criteria - Assess pass-fail for trends analysis - PassTrendCriteria <- TrendCriteria(first_20pct, last__20pct, num_samples, rate, years, months,monthMultiplier = 1) # This month multiplier is linked to the - # the number of months selected here - # Processing Trends for sites/parameters pass trend criteria - if(PassTrendCriteria){ - - if(length(parameters)==1){ - s<-seaKenLAWA(x,"median") # x has a different structure where there is only one item - } else{ - s<-seaKenLAWA(x[,j],"median") - } - #cat(i,lawa_ids[i],length(lawa$time),parameters[j],s$p.value,s$sen.slope.pct,"\n") - - #s$sen.slope.pct # <---- required for LAWA - #s$sen.slope # <---- - #s$p.value # <---- required for LAWA - - - m <-matrix(data=c(l,parameters[j],s$sen.slope.pct,s$sen.slope,s$p.value),nrow=1,ncol=5,byrow=TRUE) - if(k==1){ # removed i==i condition - causing errors where first site doesn't meet criteria for trend analysis - seasonalkendall <-as.data.frame(m,stringsAsFactors=FALSE) - #cat("seasonalkendal dataframe created\n") - } else { - seasonalkendall <- rbind(seasonalkendall, as.data.frame(m,stringsAsFactors=FALSE)) - #cat("Appending to seasonalkendall dataframe\n") - } - k <- k + 1 - } - } - -} - -names(seasonalkendall) <- c("LAWAID","Parameter","Sen.Pct","Sen.Slope","p.value") -names(validDataForTrend) <- c("LAWAID","Parameter","N.Months.StartYear","N.Months.EndYear","Num.Samples") -validDataForTrend$freq<- "Bimonthly" - -seasonalkendall$Sen.Pct <-as.numeric(as.character(seasonalkendall$Sen.Pct)) -seasonalkendall$Sen.Slope <-as.numeric(as.character(seasonalkendall$Sen.Slope)) -seasonalkendall$p.value <-as.numeric(as.character(seasonalkendall$p.value)) - -seasonalkendall$freq<- "Bimonthly" -trendscores <- calcTrendScore(seasonalkendall) - -rm(m) - -load(file="//file/herman/R/OA/08/02/2017/Water Quality/ROutput/lawa_sitetable.RData") - -# Fields have been renamed from WFS feed to match what the code is -# expecting, as code originally written expecting data from Hilltop Site Table. -newFieldNames <- c("X","LAWAID","ID", "CouncilSiteID", "UsedInLAWA","AltitudeGroup","LanduseGroup","FrequencyAll","Frequency", - "Region","Agency","ISLAND","CatchID","CatchType", - "NZREACH","Catchment","Comments","LawaCatchm") - -names(l) <- newFieldNames - -trends <- merge(trendscores, l, by.x="LAWAID",by.y="LAWAID",all.x=TRUE) # Using LAWAIDs to join tables -rm(seasonalkendall) -write.csv(trends,file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trend_bimonthly_",StartYear,"-",EndYear,".csv",sep="")) -write.csv(validDataForTrend,file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/validDataForTrend_bimonthly_",StartYear,"-",EndYear,".csv",sep="")) - -cat("LAWA Water QUality Trend Analysis\nCompleted assigning Trend Results for Bimonthly Data\n") -cat(paste(k,"Sites/Parameter combinations meet 75 percent sampling occasion requirements\n")) -rm(trends,validDataForTrend) +# ###################################################################################################### +# # Seasonal Kendall for Bimonthly Sampling for each site, by each parameter +# +# # Returning unique LAWAIDs for processing data subsets +# lawa_ids <- as.character(unique(df_value_bimonthly$LAWAID)) +# +# k <- 1 # counter for sites/parameters meeting minimum N +# rbindTrendCheck<-FALSE # Flagging condition for rbinding dataframe that keeps track of sites, measurements and checks for trend inclusion +# for(i in 1:length(lawa_ids)){ +# +# months<-6 +# l <- lawa_ids[i] +# df_value_bimonthly1 <- subset(df_value_bimonthly, LAWAID==l) +# parameters <- as.character(unique(df_value_bimonthly1$parameter)) +# # this step is to double check output with TimeTrends +# #Uncomment if needed +# #write.csv(df_value_monthly1,file=paste("c:/data/MWR_2013/2013/ES-00022.csv",sep="")) +# +# lawa <- wqData(data=df_value_bimonthly1,locus=c(3,5,15),c(2,4),site.order=TRUE,time.format="%Y-%m-%d",type="long") +# x <- tsMake(object=lawa,focus=gsub("-",".",l)) +# #cat("length(parameters)",length(parameters),"\n") +# #cat(parameters,"\n") +# +# # calculating seasonal Kendall statistics for individual parameters for an individual site +# for(j in 1:length(parameters)){ +# ### TREND INCLUSION CRITERIA +# # 1. Count samples in the first or last 20 percent of the 5 year window to compare to entry criteria for trend +# first_20pct<-length(df_value_bimonthly1$year[df_value_bimonthly1$year==StartYear & df_value_bimonthly1$parameter==parameters[j]]) +# last__20pct<-length(df_value_bimonthly1$year[df_value_bimonthly1$year==EndYear & df_value_bimonthly1$parameter==parameters[j]]) +# # 2. Count samples in order to compare to entry criteria for trend +# num_samples <- length(subset(df_value_bimonthly1,parameter==parameters[j])[,1]) +# +# # building dataframe to report out data to assess pass-fails for trend inclusion +# v <- matrix(data=c(l,parameters[j],first_20pct,last__20pct,num_samples),nrow=1,ncol=5,byrow=TRUE) +# if(!rbindTrendCheck){ +# validDataForTrend <-as.data.frame(v,stringsAsFactors=FALSE) +# rbindTrendCheck<-TRUE +# } else { +# validDataForTrend <- rbind(validDataForTrend, as.data.frame(v,stringsAsFactors=FALSE)) +# } +# +# # Check Trend Criteria - Assess pass-fail for trends analysis +# PassTrendCriteria <- TrendCriteria(first_20pct, last__20pct, num_samples, rate, years, months,monthMultiplier = 1) # This month multiplier is linked to the +# # the number of months selected here +# # Processing Trends for sites/parameters pass trend criteria +# if(PassTrendCriteria){ +# +# if(length(parameters)==1){ +# s<-seaKenLAWA(x,"median") # x has a different structure where there is only one item +# } else{ +# s<-seaKenLAWA(x[,j],"median") +# } +# #cat(i,lawa_ids[i],length(lawa$time),parameters[j],s$p.value,s$sen.slope.pct,"\n") +# +# #s$sen.slope.pct # <---- required for LAWA +# #s$sen.slope # <---- +# #s$p.value # <---- required for LAWA +# +# +# m <-matrix(data=c(l,parameters[j],s$sen.slope.pct,s$sen.slope,s$p.value),nrow=1,ncol=5,byrow=TRUE) +# if(k==1){ # removed i==i condition - causing errors where first site doesn't meet criteria for trend analysis +# seasonalkendall <-as.data.frame(m,stringsAsFactors=FALSE) +# #cat("seasonalkendal dataframe created\n") +# } else { +# seasonalkendall <- rbind(seasonalkendall, as.data.frame(m,stringsAsFactors=FALSE)) +# #cat("Appending to seasonalkendall dataframe\n") +# } +# k <- k + 1 +# } +# } +# +# } +# +# names(seasonalkendall) <- c("LAWAID","Parameter","Sen.Pct","Sen.Slope","p.value") +# names(validDataForTrend) <- c("LAWAID","Parameter","N.Months.StartYear","N.Months.EndYear","Num.Samples") +# validDataForTrend$freq<- "Bimonthly" +# +# seasonalkendall$Sen.Pct <-as.numeric(as.character(seasonalkendall$Sen.Pct)) +# seasonalkendall$Sen.Slope <-as.numeric(as.character(seasonalkendall$Sen.Slope)) +# seasonalkendall$p.value <-as.numeric(as.character(seasonalkendall$p.value)) +# +# seasonalkendall$freq<- "Bimonthly" +# trendscores <- calcTrendScore(seasonalkendall) +# +# rm(m) +# +# load(file="//file/herman/R/OA/08/02/2017/Water Quality/ROutput/lawa_sitetable.RData") +# +# # Fields have been renamed from WFS feed to match what the code is +# # expecting, as code originally written expecting data from Hilltop Site Table. +# newFieldNames <- c("X","LAWAID","ID", "CouncilSiteID", "UsedInLAWA","AltitudeGroup","LanduseGroup","FrequencyAll","Frequency", +# "Region","Agency","ISLAND","CatchID","CatchType", +# "NZREACH","Catchment","Comments","LawaCatchm") +# +# names(l) <- newFieldNames +# +# trends <- merge(trendscores, l, by.x="LAWAID",by.y="LAWAID",all.x=TRUE) # Using LAWAIDs to join tables +# rm(seasonalkendall) +# write.csv(trends,file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/trend_bimonthly_",StartYear,"-",EndYear,".csv",sep="")) +# write.csv(validDataForTrend,file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/validDataForTrend_bimonthly_",StartYear,"-",EndYear,".csv",sep="")) +# +# cat("LAWA Water QUality Trend Analysis\nCompleted assigning Trend Results for Bimonthly Data\n") +# cat(paste(k,"Sites/Parameter combinations meet 90 percent sampling occasion requirements\n")) +# rm(trends,validDataForTrend) ###################################################################################################### # Seasonal Kendall for Quarterly Sampling for each site, by each parameter # Returning unique LAWAIDs for processing data subsets -lawa_ids <- as.character(unique(df_value_quarterly$LAWAID)) - -k <- 1 # counter for sites/parameters meeting minimum N +#lawa_ids <- as.character(unique(df_value_monthly$LAWAID)) +uLAWAID <- unique(df_value_quarterly$LAWAID) +months<-4 # Quarterly data +monthLbl <- "Quarterly" +k <- 1 # counter for sites/parameters meeting minimum N rbindTrendCheck<-FALSE # Flagging condition for rbinding dataframe that keeps track of sites, measurements and checks for trend inclusion -for(i in 1:length(lawa_ids)){ - - l <- lawa_ids[i] - df_value_quarterly1 <- subset(df_value_quarterly, LAWAID==l) - parameters <- as.character(unique(df_value_quarterly1$parameter)) +for(i in 1:length(uLAWAID)){ + # Store current LAWAID + l <- uLAWAID[i] + # Store vector of unique parameters for current LAWAID + uWQParam <- unique(df_value_quarterly$parameter[df_value_quarterly$LAWAID==l]) + + #l <- lawa_ids[i] + #df_value_monthly1 <- subset(df_value_monthly, LAWAID==l) + #parameters <- as.character(unique(df_value_monthly1$parameter)) # this step is to double check output with TimeTrends - #Uncomment if needed - #write.csv(df_value_monthly1,file=paste("c:/data/MWR_2013/2013/ES-00022.csv",sep="")) - lawa <- wqData(data=df_value_quarterly1,locus=c(3,5,15),c(2,4),site.order=TRUE,time.format="%Y-%m-%d",type="long") - x <- tsMake(object=lawa,focus=gsub("-",".",l)) - #cat("length(parameters)",length(parameters),"\n") - #cat(parameters,"\n") + ## The following two lines of code have now been rendered unnecessary by changing + ## the library used for the trend analysis from "wq" to "EnvStats". + ## The original "wq" library (now no longer available on CRAN) required a + ## timeseries object to be passed to the seasonal kendall functdion; the + ## EnvStats library only requires a dataframe. + + #lawa <- wqData(data=df_value_monthly1,locus=c(3,5,15),c(2,4),site.order=TRUE,time.format="%Y-%m-%d",type="long") + #x <- tsMake(object=lawa,focus=gsub("-",".",l)) # calculating seasonal Kendall statistics for individual parameters for an individual site - for(j in 1:length(parameters)){ + + for(j in 1:length(uWQParam)){ + # subsetting dataframe based on lawaid and parameter + x <- df_value_quarterly[df_value_quarterly$LAWAID==l&df_value_quarterly$parameter==uWQParam[j],] + ### TREND INCLUSION CRITERIA # 1. Count samples in the first or last 12 month periods to compare to entry criteria for trend - first_year<-length(df_value_quarterly1$year[df_value_quarterly1$year==StartYear & df_value_quarterly1$parameter==parameters[j]]) - last__year<-length(df_value_quarterly1$year[df_value_quarterly1$year==EndYear & df_value_quarterly1$parameter==parameters[j]]) + first_year<-length(x$year[x$year==StartYear]) + last__year<-length(x$year[x$year==EndYear]) + # 2. Count samples in order to compare to entry criteria for trend - num_samples <- length(subset(df_value_quarterly1,parameter==parameters[j])[,1]) + num_samples <- length(x[,1]) # building dataframe to report out data to assess pass-fails for trend inclusion - v <- matrix(data=c(l,parameters[j],first_year,last__year,num_samples),nrow=1,ncol=5,byrow=TRUE) + v <- matrix(data=c(l,uWQParam[j],first_year,last__year,num_samples),nrow=1,ncol=5,byrow=TRUE) if(!rbindTrendCheck){ validDataForTrend <-as.data.frame(v,stringsAsFactors=FALSE) rbindTrendCheck<-TRUE @@ -589,46 +1496,69 @@ for(i in 1:length(lawa_ids)){ } # Check Trend Criteria - Assess pass-fail for trends analysis - PassTrendCriteria <- TrendCriteria(first_year, last__year, num_samples, rate, years, months) + PassTrendCriteria <- TrendCriteria(first_year, last__year, num_samples, rate, years, months, monthMultiplier = 1) # Processing Trends for sites/parameters pass trend criteria if(PassTrendCriteria){ - if(length(parameters)==1){ - s<-seaKenLAWA(x,"median") # x has a different structure where there is only one item - } else{ - s<-seaKenLAWA(x[,j],"median") - } - #cat(i,lawa_ids[i],length(lawa$time),parameters[j],s$p.value,s$sen.slope.pct,"\n") - #s$sen.slope.pct # <---- required for LAWA - #s$sen.slope # <---- - #s$p.value # <---- required for LAWA - + s <- seaKenEPA(x) + list(sen.slope = sk.sen.slope, + sen.slope.pct = sk.sen.slope.pct, + p.value = sk.p.value, + sen.z = sk.sen.z, + sen.z.prob = sk.sen.z.prob, + slope.lcl90 = sk.slope.LCL90, + slope.ucl90 = sk.slope.UCL90) - m <-matrix(data=c(l,parameters[j],s$sen.slope.pct,s$sen.slope,s$p.value),nrow=1,ncol=5,byrow=TRUE) - if(k==1){ # removed i==i condition - causing errors where first site doesn't meet criteria for trend analysis - seasonalkendall <-as.data.frame(m,stringsAsFactors=FALSE) + m <-data.frame(l, + uWQParam[j], + s$sen.slope.pct, + s$sen.slope, + s$p.value, + sen.z, + sen.z.prob, + s$slope.lcl90, + s$slope.ucl90, + stringsAsFactors = FALSE) + if(k==1){ + seasonalkendall <- m #cat("seasonalkendal dataframe created\n") } else { - seasonalkendall <- rbind(seasonalkendall, as.data.frame(m,stringsAsFactors=FALSE)) + seasonalkendall <- rbind(seasonalkendall, m) #cat("Appending to seasonalkendall dataframe\n") } + + rm(s,m) k <- k + 1 + } } } -names(seasonalkendall) <- c("LAWAID","Parameter","Sen.Pct","Sen.Slope","p.value") +names(seasonalkendall) <- c("LAWAID", + "Parameter", + "Sen.Pct", + "Sen.Slope", + "p.value", + "sen.z", + "sen.z.prob", + "Sen.Slope.LCL90", + "Sen.Slope.UCL90") + +seasonalkendall$freq<- monthLbl +trendscores <- calcTrendScore(seasonalkendall) + names(validDataForTrend) <- c("LAWAID","Parameter","N.Months.StartYear","N.Months.EndYear","Num.Samples") -validDataForTrend$freq<- "Quarterly" +validDataForTrend$freq<- monthLbl -seasonalkendall$Sen.Pct <-as.numeric(as.character(seasonalkendall$Sen.Pct)) -seasonalkendall$Sen.Slope <-as.numeric(as.character(seasonalkendall$Sen.Slope)) -seasonalkendall$p.value <-as.numeric(as.character(seasonalkendall$p.value)) -seasonalkendall$freq<- "Quarterly" -trendscores <- calcTrendScore(seasonalkendall) +#names(seasonalkendall) <- c("LAWAID","Parameter","Sen.Pct","Sen.Slope","p.value") + +#seasonalkendall$Sen.Pct <-as.numeric(as.character(seasonalkendall$Sen.Pct)) +#seasonalkendall$Sen.Slope <-as.numeric(as.character(seasonalkendall$Sen.Slope)) +#seasonalkendall$p.value <-as.numeric(as.character(seasonalkendall$p.value)) + rm(m) @@ -652,11 +1582,12 @@ write.csv(trends,file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput write.csv(validDataForTrend,file=paste("//file/herman/R/OA/08/02/2017/Water Quality/ROutput/validDataForTrend_quarterly_",StartYear,"-",EndYear,".csv",sep="")) cat("LAWA Water QUality Trend Analysis\nCompleted assigning Trend Results for Quarterly Data\n") -cat(paste(k,"Sites/Parameter combinations meet 75 percent sampling occasion requirements\n")) +cat(paste(k,"Sites/Parameter combinations meet 90 percent sampling occasion requirements\n")) rm(trends,validDataForTrend) ###################################################################################################### print(Sys.time()-t) -setwd(od) \ No newline at end of file +setwd(od) + diff --git a/main.R b/main.R index 5b4f6a1..fe737ed 100644 --- a/main.R +++ b/main.R @@ -23,16 +23,15 @@ source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/scripts/WQualit ## STATE And TREND Analysis source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/scripts/WQualityStateTrend/lawa_state_5yr.R") -#source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/scripts/WQualityStateTrend/lawa_trend_5yr.R") -#source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/scripts/WQualityStateTrend/lawa_trend_10yr.R") +source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/scripts/WQualityStateTrend/lawa_trend_x_yrs.R") ## NOF Analysis -#source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/scripts/WQualityStateTrend/NOF_SWQ.R") +source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/scripts/WQualityStateTrend/NOF_SWQ.R") ## Preparing data for Export source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/scripts/WQualityStateTrend/FinaliseTrend.R") source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/scripts/WQualityStateTrend/lawa_10yrsDataPull.R") -#source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/scripts/WQualityStateTrend/lawa_dataForGraphingOnLaWA.R") +source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/scripts/WQualityStateTrend/lawa_dataForGraphingOnLaWA.R") source("//file/herman/r/oa/08/02/2017/Water Quality/R/lawa_state/scripts/WQualityStateTrend/PackageDataForDelivery.R")