diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..fef73a2 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 K. Arthur Endsley + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..be759f1 --- /dev/null +++ b/README.md @@ -0,0 +1,30 @@ +MODIS MOD17 Terrestrial Productivity Algorithm +============================================== + +The MODIS MOD17 algorithm provided the first global, continuous, weekly estimates of ecosystem gross primary productivity (GPP) and annual estimates of net primary productivity (NPP). This source code can be used for comprehensive calibration, validation, sensitivity, and uncertainty analysis of the MOD17 algorithm. It was used by Endsley et al. (In Review) for the final recalibration of MODIS MOD17 and the development of a new, VIIRS-based VNP17 global productivity algorithm. + + +Installation +------------ + +Within the `MOD17` repository's root directory: + +```sh +pip install . +``` + +Tests can be run with: + +```sh +python tests/tests.py +``` + + +Citation +-------- + +**If using this software, please cite the following paper:** + +``` +Endsley, K.A., M. Zhao, J.S. Kimball, S. Devadiga. In Review. Continuity of global MODIS terrestrial primary productivity estimates in the VIIRS era using model-data fusion. Journal of Geophysical Research: Biogeosciences. +``` diff --git a/REQUIREMENTS b/REQUIREMENTS new file mode 100644 index 0000000..09eb527 --- /dev/null +++ b/REQUIREMENTS @@ -0,0 +1,8 @@ +h5py==3.4.0 +netCDF4>=1.5.7 +numpy<=1.21.5 +scipy>=1.7.0 +xarray>=0.19.0 +suntransit>=0.1.0 +tqdm>=4.60.0 +matplotlib>=3.0.0 diff --git a/data/MOD17_BPLUT_C5.1_MERRA_NASA.txt b/data/MOD17_BPLUT_C5.1_MERRA_NASA.txt new file mode 100644 index 0000000..8b9c58a --- /dev/null +++ b/data/MOD17_BPLUT_C5.1_MERRA_NASA.txt @@ -0,0 +1,21 @@ +UMD_VEG_LC ENF=0 EBF=1 DNF=2 DBF=3 MF=4 CShrub=5 OShrub=6 WSavannas=7 Savannas=8 Grass=9 Crop=10 +LUEmax(KgC/m^2/d/MJ) 0.001211 0.001405 0.001227 0.001526 0.001226 0.001495 0.001027 0.001498 0.001454 0.001215 0.001300 +Tmin_min(C) -8.00 -8.00 -8.00 -6.00 -7.00 -8.00 -8.00 -8.00 -8.00 -8.00 -8.00 +Tmin_max(C) 8.31 9.09 10.44 9.94 9.50 8.61 8.80 11.39 11.39 12.02 12.02 +VPD_min(Pa) 650.0 1000.0 650.0 650.0 650.0 650.0 650.0 650.0 650.0 650.0 650.0 +VPD_max(Pa) 3000.0 4000.0 3500.0 2900.0 2900.0 4300.0 4400.0 3500.0 3600.0 4200.0 4500.0 +SLA(LAI/KgC) 15.0 26.9 16.9 24.7 22.6 9.4 12.0 28.8 28.9 38.0 38.0 +Q10(unitless) 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 +froot_leaf_ratio 1.2 1.1 1.7 1.1 1.1 1.0 1.3 1.8 1.8 2.6 2.0 +livewood_leaf_ratio 0.182 0.162 0.165 0.203 0.203 0.079 0.040 0.091 0.051 0.000 0.000 +leaf_mr_base(Kgc/KgC/d) 0.00604 0.00604 0.00815 0.00778 0.00778 0.00869 0.00519 0.00869 0.00869 0.0098 0.0098 +froot_mr_base(Kgc/KgC/d) 0.00519 0.00519 0.00519 0.00519 0.00519 0.00519 0.00519 0.00519 0.00519 0.00819 0.00819 +livewood_mr_base(Kgc/KgC/d) 0.00397 0.00397 0.00397 0.00371 0.00371 0.00436 0.00218 0.00312 0.00100 0.00000 0.00000 + +*** Recalibrated by Maosheng Zhao before March 31st, 2009. Version is the improved Collection5.1 *** +*** Note not add extra spaces between variable names and parentheses, otherwise it will be messed up when used by MOD17 program *** +Below is the mean GPP and NPP from 2000 to 2006 estimated from 0.25 degree tuned BPLUT system of the C5.1, which will be used to +tune new version of the global MOD17. The tuning tools are MOD17_improved_main_code_GEO.c and mean_GPP_NPP_tune_BPLUT.c +UMD_VEG_LC ENF=0 EBF=1 DNF=2 DBF=3 MF=4 CShrub=5 OShrub=6 WSavannas=7 Savannas=8 Grass=9 Crop=10 +GPP(gC/m^2/yr) 877.10 2702.62 727.21 1339.93 1133.22 812.04 308.83 1368.90 1208.90 393.25 884.11 +NPP(gC/m^2/yr) 478.49 1175.45 343.55 622.79 604.82 381.50 160.14 673.06 594.73 205.86 486.46 diff --git a/mod17/__init__.py b/mod17/__init__.py new file mode 100644 index 0000000..3abe2b9 --- /dev/null +++ b/mod17/__init__.py @@ -0,0 +1,503 @@ +''' +The MOD17 Daily GPP and Annual GPP algorithm. + +Note that there are two hidden methods of the MOD17 class: + +- `MOD17._gpp()` +- `MOD17._npp()` + +These are streamlined implementations of `MOD17.daily_gpp()` and +`MOD17.annual_npp()`, respectively, and were designed for use in calibration, +where repeated function calls can make function overhead a real issue. These +streamlined functions expect a vectorized parameters array as the first +argument and subsequent arguments are driver datasets, e.g.: + + MOD17._gpp(params, fpar, tmin, vpd, par) + MOD17._npp(params, fpar, tmin, vpd, par, lai, tmean, years) +''' + +import warnings +import numpy as np +from typing import Callable, Sequence, Tuple, Union, Iterable +from numbers import Number + +PFT_VALID = (1,2,3,4,5,6,7,8,9,10,12) + +class MOD17(object): + ''' + The MODIS MxD17 Gross Primary Productivity and Net Photosynthesis model. + The required model parameters are: + + - `LUE_max`: Maximum light-use efficiency (kg C MJ-1) + - `tmin0` and `tmin1`: The lower and upper bounds on the temperature + response of photosynthesis (deg C); i.e., temperature at which stomata + are fully closed and fully open, respectively + - `vpd0` and `vpd0`: The lower and upper bounds on the response to VPD + of photosynthesis (Pa); i.e., VPD at which stomata are full open and + fully closed, respectively + - `SLA`: Specific leaf area, or projected leaf area per kilogram of C + [LAI kg C-1] + - `q10`: Exponent shape parameter controlling respiration as a function of + temperature (in degrees C) (unitless) + - `froot_leaf_ratio`: The ratio of fine root C to leaf C (unitless). + - `livewood_leaf_ratio`: Ratio of live wood carbon to annual maximum leaf + carbon + - `leaf_mr_base`: Maintenance respiration per unit leaf carbon per day at + a reference temperature of 20 degrees C [kg C (kg C)-1 day-1] + - `froot_mr_base`: Maintenance respiration per unit fine root carbon per + day at a reference temperature of 20 degrees C [kg C (kg C)-1 day-1] + - `livewood_mr_base`: Maintenance respiration per unit live wood carbon + per day at a reference temperature of 20 degrees C [kg C (kg C)-1 d-1] + + NOTE: This class includes private class methods `MOD17._gpp()` and + `MOD17._gpp()`, that avoid the overhead associated with creating a model + instance; it should be used, e.g., for model calibration because it is faster + and produces the same results as `MOD17.daily_gpp()`. + + NOTE: For multiple PFTs, vectorized parameters array can be passed; i.e., + a dictionary where each value is an (N,) array for N sites. + + Parameters + ---------- + params : dict + Dictionary of model parameters + ''' + required_parameters = [ + 'LUE_max', 'tmin0', 'tmin1', 'vpd0', 'vpd1', 'SLA', + 'Q10_livewood', 'Q10_froot', 'froot_leaf_ratio', + 'livewood_leaf_ratio', 'leaf_mr_base', 'froot_mr_base', + 'livewood_mr_base' + ] + + def __init__(self, params: dict): + self.params = params + for key in self.required_parameters: + setattr(self, key, params[key]) + + @staticmethod + def _gpp(params, fpar, tmin, vpd, par): + 'Daily GPP as static method, avoids overhead of class instantiation' + # "params" argument should be a Sequence of atomic parameter values + # in the order prescribed by "required_parameters" + tmin_scalar = linear_constraint(params[1], params[2])(tmin) + vpd_scalar = linear_constraint( + params[3], params[4], form = 'reversed')(vpd) + lue = params[0] * tmin_scalar * vpd_scalar + return 1e3 * lue * fpar * par + + @staticmethod + def _npp(params, fpar, tmin, vpd, par, lai, tmean, years): + ''' + Annual NPP as static method, avoids overhead of class instantiation. + NOTE: It's assumed that the elements of `years` are in chronological + order on the first axis (time axis). + ''' + # "params" argument should be a Sequence of atomic parameter values + # in the order prescribed by "required_parameters" + gpp = MOD17._gpp(params, fpar, tmin, vpd, par) + # Daily respiration + leaf_mass = lai / params[5] # LAI divided by SLA -> leaf mass [kg m-2] + froot_mass = leaf_mass * params[8] # Leaf mass times `froot_leaf_ratio` + # NOTE: Q10 calculated differently depending on the component + _exp = (tmean - 20) / 10 + q10_leaf = np.power(3.22 - 0.046 * tmean, _exp) + q10_froot = np.power(params[7], _exp) + # Convert leaf, froot mass from [kg C m-2] to [g C m-2], then... + r_leaf = 1e3 * leaf_mass * params[10] * q10_leaf + r_froot = 1e3 * froot_mass * params[11] * q10_froot + # Accumulate respiration over each year + all_years = np.unique(years).tolist() + # Pre-allocate arrays + mr_leaf = np.full((len(all_years), *lai.shape[1:],), np.nan) + mr_froot = np.full((len(all_years), *lai.shape[1:],), np.nan) + mr_livewood = np.full((len(all_years), *lai.shape[1:],), np.nan) + diff = np.full((len(all_years), *lai.shape[1:],), np.nan) + for i, each_year in enumerate(all_years): + # Sum respiration for each tissue in each year + mr_leaf[i] = np.nansum( + np.where(years == each_year, r_leaf, 0), axis = 0) + mr_froot[i] = np.nansum( + np.where(years == each_year, r_froot, 0), axis = 0) + livewood_mass = (np.nanmax( + np.where(years == each_year, lai, np.nan), axis = 0 + ) / params[5] + ) * params[9] + # For consistency with other respiration components, livewood + # respiration should be zero, not NaN, when no respiration + mrl = 1e3 * livewood_mass * params[12] *\ + np.power(params[6], (tmean - 20) / 10).sum(axis = 0) + mr_livewood[i] = np.where(np.isnan(mrl), 0, mrl) + # Total plant maintenance respiration + r_m = mr_leaf[i] + mr_froot[i] + mr_livewood[i] + # GPP - R_M + diff[i] = np.where(years == each_year, gpp, 0).sum(axis = 0) - r_m + # Annual growth respiration is assumed to be 20% of (GPP - R_M); see + # Figure 1 of MOD17 User Guide + return np.where(diff < 0, 0, 0.8 * diff) + + @staticmethod + def par(sw_rad: Number, period_hrs: Number = 1) -> Number: + ''' + Calculates daily total photosynthetically active radiation (PAR) from + (hourly) incoming short-wave radiation (SW_rad). PAR is assumed to + be 45% of SW_rad. + + Parameters + ---------- + swrad : int or float or numpy.ndarray + Incoming short-wave radiation (W m-2) + period_hrs : int + Period over which radiation is measured, in hours (Default: 1) + + Returns + ------- + int or float or numpy.ndarray + ''' + # Convert SW_rad from [W m-2] to [MJ m-2], then take 45%; + # 3600 secs hr-1 times (1 MJ / 1e6 Joules) == 0.0036 + return 0.45 * (0.0036 * (24 / period_hrs) * sw_rad) + + @staticmethod + def vpd(qv10m: Number, pressure: Number, tmean: Number) -> Number: + ''' + Computes vapor pressure deficit (VPD) from surface meteorology. + + Parameters + ---------- + qv10m : int or float or numpy.ndarray + Water vapor mixing ratio at 10-meter height (Pa) + pressure : int or float or numpy.ndarray + Atmospheric pressure (Pa) + tmean : int or float or numpy.ndarray + Mean daytime temperature (degrees C) + + Returns + ------- + int or float or numpy.ndarray + ''' + # Actual vapor pressure (Gates 1980, Biophysical Ecology, p.311) + avp = (qv10m * pressure) / (0.622 + (0.379 * qv10m)) + # Saturation vapor pressure (similar to FAO formula) + svp = 610.7 * np.exp((17.38 * tmean) / (239 + tmean)) + return svp - avp + + def annual_npp( + self, fpar: Sequence, tmin: Sequence, vpd: Sequence, + par: Sequence, lai: Sequence, tmean: Sequence, years: Sequence + ) -> np.ndarray: + ''' + Annual net primary productivity (NPP). + + Parameters + ---------- + fpar : Sequence + Fraction of PAR intercepted by the canopy [0, 1], a (T x ...) + array for T number of days + tmin : Sequence + Daily minimum temperature (degrees C), a (T x ...) array for T + number of days + vpd : Sequence + Daytime vapor pressure deficit (Pa), a (T x ...) array for T + number of days + par : Sequence + Daily photosynthetically active radation (MJ m-2 day-1) + lai : Sequence + Leaf area index, daily, a (T x ...) array for T number of days + tmean : Sequence + Mean daily temperature (degrees C), a (T x ...) array for T number + of days + years : Sequence + Sequence of integers indicating the year of each daily + measurement, in order (e.g., [2015, 2015, ..., 2017]); a (T x ...) + array for T number of days + + Returns + ------- + numpy.ndarray + Total annual NPP [g C m-2 year-1] + ''' + r_leaf, r_froot, r_livewood = self.annual_respiration( + lai, tmean, years) + r_m = r_leaf + r_froot + r_livewood + gpp = self.daily_gpp(fpar, tmin, vpd, par) + diff = np.empty(r_m.shape) + all_years = np.unique(years).tolist() + for i, each_year in enumerate(all_years): + # GPP - R_M + diff[i] = np.where(years == each_year, gpp, 0).sum(axis = 0) - r_m[i] + return np.where(diff < 0, 0, 0.8 * diff) + + def annual_respiration( + self, lai: Sequence, tmean: Sequence, years: Sequence + ) -> Iterable[Tuple[Sequence, Sequence, Sequence]]: + ''' + Annual total maintenance respiration. Input datasets should have daily + denominations and extend over one or more years. + + Parameters + ---------- + lai : Sequence + Leaf area index, daily, a (T x ...) array for T number of days + tmean : Sequence + Mean daily temperature (degrees C), a (T x ...) array for T number + of days + years : Sequence + Sequence of integers indicating the year of each daily + measurement, in order (e.g., [2015, 2015, ..., 2017]); a (T x ...) + array for T number of days + + Returns + ------- + tuple + A 3-tuple of total annual (leaf, fine root, livewood) respiration + with units of [g C m-2 year-1] + ''' + assert lai.shape == years.shape,\ + 'LAI array should conform with "years" array' + assert tmean.shape == years.shape,\ + 'Mean temperature array should conform with "years" array' + r_leaf_daily, r_froot_daily = self.daily_respiration(lai, tmean) + r_leaf, r_froot, r_livewood = [], [], [] + all_years = np.unique(years).tolist() + all_years.sort() + for i, each_year in enumerate(all_years): + r_leaf.append( + np.nansum(np.where(years == each_year, r_leaf_daily, 0), + axis = 0)) + r_froot.append( + np.nansum(np.where(years == each_year, r_froot_daily, 0), + axis = 0)) + # Annual maximum leaf mass (kg C) converted to livewood mass + # by allometric relation (livewood_leaf_ratio) + livewood_mass = (np.nanmax( + np.where(years == each_year, lai, np.nan), axis = 0 + ) / self.SLA) * self.livewood_leaf_ratio + # Livewood maintenance respiration (g C day-1), converted from + # (kg C day-1), as the product of livewood_mass, base + # respiration rate (livewood_mr_base), and annual sum of the + # maint. respiration term (Q10), see Equation 1.10, User's Guide + # NOTE: "livewood_mr_base" is denominated in days + # (kg C kg C-1 day-1) but that's okay because we took the annual + # sum of the Q10 respiration, essentially multipling by ~365 + rl = 1e3 * livewood_mass * self.livewood_mr_base *\ + np.power(self.Q10_livewood, (tmean - 20) / 10).sum(axis = 0) + # For consistency with other respiration components, livewood + # respiration should be zero, not NaN, when no respiration + r_livewood.append(np.where(np.isnan(rl), 0, rl)) + return (np.stack(r_leaf), np.stack(r_froot), np.vstack(r_livewood)) + + def daily_gpp( + self, fpar: Number, tmin: Number, vpd: Number, + par: Number) -> Number: + r''' + Daily gross primary productivity (GPP). + + $$ + \mathrm{GPP} = \varepsilon\times f(T_{min})\times f(V)\times + [\mathrm{PAR}]\times [\mathrm{fPAR}] + $$ + + Where $T_{min}$ is the minimum daily temperature, $V$ is the daytime + vapor pressure deficit (VPD), PAR is daily photosynthetically active + radiation, fPAR is the fraction of PAR absorbed by the canopy, and + $\varepsilon$ is the intrinsic (or maximum) light-use efficiency. + + Parameters + ---------- + fpar : int or float or numpy.ndarray + Fraction of PAR intercepted by the canopy [0, 1] + tmin : int or float or numpy.ndarray + Daily minimum temperature (degrees C) + vpd : int or float or numpy.ndarray + Daytime vapor pressure deficit (Pa) + par : int or float or numpy.ndarray + Daily photosynthetically active radation (MJ m-2 day-1) + + Returns + ------- + int or float or numpy.ndarray + Daily GPP flux in [g C m-2 day-1] + ''' + return 1e3 * self.lue(tmin, vpd) * fpar * par + + def daily_respiration( + self, lai: Number, tmean: Number + ) -> Iterable[Tuple[Number, Number]]: + r''' + Daily maintenance respiration for leaves and fine roots. + + Maintenance respiration ($r_m$) for leaves or fine roots is given: + + $$ + $r_m$ = m \times r_0 \times q^{\frac{T - 20}{10}} + $$ + + Where $m$ is either the leaf mass or fine root mass; $r_0$ is the rate + of maintenance respiration per unit leaf carbon (per day, at 20 + degrees C); and $q$ is the Q10 factor. + + NOTE: For fine roots and live wood, Q10 is a constant value of 2.0. + For leaves, the temperature-acclimated Q10 equation of Tjoelker et al. + (2001, Global Change Biology) is used: + + $$ + Q_{10} = 3.22 - 0.046 * T_{avg} + $$ + + The "net photosynthesis" quantity in MOD17, even though it is a bit + misleading (it does not account for growth respiration and livewood + $r_m$) can then be calculated as GPP less the maintenance respiration + of leaves and fine roots: + + $$ + P_{net} = [\mathrm{GPP}] - r_{leaf} - r_{root} + $$ + + Parameters + ---------- + lai : float or numpy.ndarray + Leaf area index, daily + tmean : float or numpy.ndarray + Mean daily temperature (degrees C) + + Returns + ------- + tuple + 2-element tuple of (leaf respiration, fine root respiration) in + units of [g C m-2 day-1] + ''' + # Leaf mass, fine root mass (Eq 1.4, 1.5 in MOD17 User Guide) + leaf_mass = lai / self.SLA + froot_mass = leaf_mass * self.froot_leaf_ratio + # NOTE: Q10 calculated differently depending on the component + _exp = (tmean - 20) / 10 # Equations 1.6 and 1.7 + q10_leaf = np.power(3.22 - 0.046 * tmean, _exp) # Equation 1.11 + q10_froot = np.power(self.Q10_froot, _exp) + # NOTE: Converting from [kg C] to [g C] via *1e3 + r_leaf = 1e3 * leaf_mass * self.leaf_mr_base * q10_leaf + r_froot = 1e3 * froot_mass * self.froot_mr_base * q10_froot + return (r_leaf, r_froot) + + def daily_net_photosynthesis( + self, fpar: Number, tmin: Number, vpd: Number, par: Number, + lai: Number, tmean: Number) -> Number: + ''' + Daily net photosynthesis ("PSNet"). See: + + - `MOD17.daily_gpp()` + - `MOD17.daily_respiration()` + + Parameters + ---------- + fpar : int or float or numpy.ndarray + Fraction of PAR intercepted by the canopy [0, 1] + tmin : int or float or numpy.ndarray + Daily minimum temperature (degrees C) + vpd : int or float or numpy.ndarray + Daytime vapor pressure deficit (Pa) + par : int or float or numpy.ndarray + Daily photosynthetically active radation (MJ m-2 day-1) + lai : float or numpy.ndarray + Leaf area index, daily + tmean : float or numpy.ndarray + Mean daily temperature (degrees C) + ''' + gpp = self.daily_gpp(fpar, tmin, vpd, par) + r_leaf, r_froot = self.daily_respiration(lai, tmean) + # See MOD17 User Gudie, Equation 1.8 + return gpp - r_leaf - r_froot + + def lue(self, tmin: Number, vpd: Number) -> Number: + ''' + The instantaneous light-use efficiency (LUE), reduced by environmental + stressors (low minimum temperature, high VPD) from the maximum LUE. + + Parameters + ---------- + tmin : int or float or numpy.ndarray + vpd : int or float or numpy.ndarray + + Returns + ------- + float or numpy.ndarray + ''' + return self.LUE_max * self.tmin_scalar(tmin) * self.vpd_scalar(vpd) + + def tmin_scalar(self, x: Number) -> Number: + ''' + Parameters + ---------- + x : int or float or numpy.ndarray + Minimum temperature (deg C) + + Returns + ------- + int or float or numpy.ndarray + ''' + return linear_constraint(self.tmin0, self.tmin1)(x) + + def vpd_scalar(self, x: Number) -> Number: + ''' + The environmental scalar for vapor pressure deficit (VPD). + + Parameters + ---------- + x : int or float or numpy.ndarray + Vapor pressure deficit (Pa) + + Returns + ------- + int or float or numpy.ndarray + ''' + return linear_constraint(self.vpd0, self.vpd1, form = 'reversed')(x) + + +def linear_constraint( + xmin: Number, xmax: Number, form: str = None) -> Callable: + ''' + Returns a linear ramp function, for deriving a value on [0, 1] from + an input value `x`: + + if x >= xmax: + return 1 + if x <= xmin: + return 0 + return (x - xmin) / (xmax - xmin) + + Parameters + ---------- + xmin : int or float + Lower bound of the linear ramp function + xmax : int or float + Upper bound of the linear ramp function + form : str + Type of ramp function: "reversed" decreases as x increases; + "binary" returns xmax when x == 1; default (None) is increasing + as x increases. + + Returns + ------- + function + ''' + assert form is None or form in ('reversed', 'binary'),\ + 'Argument "form" must be None or one of: "reversed", "binary"' + assert form == 'binary' or np.any(xmax >= xmin),\ + 'xmax must be greater than/ equal to xmin' + if form == 'reversed': + return lambda x: np.where(x >= xmax, 0, + np.where(x < xmin, 1, 1 - np.divide( + np.subtract(x, xmin), xmax - xmin))) + if form == 'binary': + return lambda x: np.where(x == 1, xmax, xmin) + return lambda x: np.where(x >= xmax, 1, + np.where(x < xmin, 0, + np.divide(np.subtract(x, xmin), xmax - xmin))) + + +def suppress_warnings(func): + 'Decorator to suppress NumPy warnings' + def inner(*args, **kwargs): + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + return func(*args, **kwargs) + return inner diff --git a/mod17/calibration.py b/mod17/calibration.py new file mode 100644 index 0000000..f609a39 --- /dev/null +++ b/mod17/calibration.py @@ -0,0 +1,1265 @@ +''' +Calibration of MOD17 against a representative, global eddy covariance (EC) +flux tower network. The model calibration is based on Markov-Chain Monte +Carlo (MCMC). Example use: + + python calibration.py tune-gpp --pft=1 + +The general calibration protocol used here involves: + +1. Check how well the chain(s) are mixing by running short: +`python calibration.py tune-gpp 1 --draws=2000` +2. If any chain is "sticky," run a short chain while tuning the jump scale: +`python calibration.py tune-gpp 1 --tune=scaling --draws=2000` +3. Using the trace plot from Step (2) as a reference, experiment with +different jump scales to try and achieve the same (optimal) mixing when +tuning on `lambda` (default) instead, e.g.: +`python calibration.py tune-gpp 1 --scaling=1e-2 --draws=2000` +4. When the right jump scale is found, run a chain at the desired length. + +Once a good mixture is obtained, it is necessary to prune the samples to +eliminate autocorrelation, e.g., in Python: + + sampler = MOD17StochasticSampler(...) + sampler.plot_autocorr(burn = 1000, thin = 10) + trace = sampler.get_trace(burn = 1000, thin = 10) + +A thinned posterior can be exported from the command line: + + $ python calibration.py export-bplut output.csv --burn=1000 --thin=10 + +References: + + Madani, N., Kimball, J. S., & Running, S. W. (2017). + "Improving global gross primary productivity estimates by computing + optimum light use efficiencies using flux tower data." + Journal of Geophysical Research: Biogeosciences, 122(11), 2939–2951. +''' + +import datetime +import json +import os +import warnings +import numpy as np +import h5py +import arviz as az +import pymc as pm +import aesara.tensor as at +import mod17 +from functools import partial +from multiprocessing import get_context, set_start_method +from numbers import Number +from typing import Callable, Sequence +from pathlib import Path +from matplotlib import pyplot +from scipy import signal +from mod17 import MOD17, PFT_VALID +from mod17.utils import pft_dominant, restore_bplut, write_bplut, rmsd + +MOD17_DIR = os.path.dirname(mod17.__file__) +# This matplotlib setting prevents labels from overplotting +pyplot.rcParams['figure.constrained_layout.use'] = True + + +class BlackBoxLikelihood(at.Op): + ''' + A custom Theano operator that calculates the "likelihood" of model + parameters; it takes a vector of values (the parameters that define our + model) and returns a single "scalar" value (the log-likelihood). + + Parameters + ---------- + model : Callable + An arbitrary "black box" function that takes two arguments: the + model parameters ("params") and the forcing data ("x") + observed : numpy.ndarray + The "observed" data that our log-likelihood function takes in + x : numpy.ndarray or None + The forcing data (input drivers) that our model requires, or None + if no driver data are required + weights : Sequence or None + Optional sequence of weights applied to the model residuals (as in + weighted least squares) + objective : str + Name of the objective (or "loss") function to use, one of + ('rmsd', 'gaussian', 'kge'); defaults to "rmsd" + ''' + itypes = [at.dvector] # Expects a vector of parameter values when called + otypes = [at.dscalar] # Outputs a single scalar value (the log likelihood) + + def __init__( + self, model: Callable, observed: Sequence, x: Sequence = None, + weights: Sequence = None, objective: str = 'rmsd'): + ''' + Initialise the Op with various things that our log-likelihood function + requires. The observed data ("observed") and drivers ("x") must be + stored on the instance so the Theano Op can work seamlessly. + ''' + self.model = model + self.observed = observed + self.x = x + self.weights = weights + if objective in ('rmsd', 'rmse'): + self._loglik = self.loglik + elif objective == 'gaussian': + self._loglik = self.loglik_gaussian + elif objective == 'kge': + self._loglik = self.loglik_kge + else: + raise ValueError('Unknown "objective" function specified') + + def loglik( + self, params: Sequence, observed: Sequence, + x: Sequence = None) -> Number: + ''' + Pseudo-log likelihood, based on the root-mean squared deviation + (RMSD). The sign of the RMSD is forced to be negative so as to allow + for maximization of this objective function. + + Parameters + ---------- + params : Sequence + One or more model parameters + observed : Sequence + The observed values + x : Sequence or None + Input driver data + + Returns + ------- + Number + The (negative) root-mean squared deviation (RMSD) between the + predicted and observed values + ''' + predicted = self.model(params, *x) + if self.weights is not None: + return -np.sqrt( + np.nanmean(((predicted - observed) * self.weights) ** 2)) + return -np.sqrt(np.nanmean(((predicted - observed)) ** 2)) + + def loglik_gaussian( + self, params: Sequence, observed: Sequence, + x: Sequence = None) -> Number: + ''' + Gaussian log-likelihood, assuming independent, identically distributed + observations. + + Parameters + ---------- + params : Sequence + One or more model parameters + observed : Sequence + The observed values + x : Sequence or None + Input driver data + + Returns + ------- + Number + The (negative) log-likelihood + ''' + predicted = self.model(params, *x) + sigma = params[-1] + # Gaussian log-likelihood; + # -\frac{N}{2}\,\mathrm{log}(2\pi\hat{\sigma}^2) + # - \frac{1}{2\hat{\sigma}^2} \sum (\hat{y} - y)^2 + return -0.5 * np.log(2 * np.pi * sigma**2) - (0.5 / sigma**2) *\ + np.nansum((predicted - observed)**2) + + def loglik_kge( + self, params: Sequence, observed: Sequence, + x: Sequence = None) -> Number: + r''' + Kling-Gupta efficiency. + + $$ + KGE = 1 - \sqrt{(r - 1)^2 + (\alpha - 1)^2 + (\beta - 1)^2} + $$ + + Parameters + ---------- + params : Sequence + One or more model parameters + observed : Sequence + The observed values + x : Sequence or None + Input driver data + + Returns + ------- + Number + The Kling-Gupta efficiency + ''' + predicted = self.model(params, *x) + r = np.corrcoef(predicted, observed)[0, 1] + alpha = np.std(predicted) / np.std(observed) + beta = np.sum(predicted) / np.sum(observed) + return 1 - np.sqrt((r - 1)**2 + (alpha - 1)**2 + (beta - 1)**2) + + def perform(self, node, inputs, outputs): + ''' + The method that is used when calling the Op. + + Parameters + ---------- + node + inputs : Sequence + outputs : Sequence + ''' + (params,) = inputs + logl = self._loglik(params, self.observed, self.x) + outputs[0][0] = np.array(logl) # Output the log-likelihood + + +class AbstractSampler(object): + ''' + Generic algorithm for fitting a model to data based on observed values + similar to what we can produce with our model. Not intended to be called + directly. + ''' + + def get_posterior(self, thin: int = 1) -> np.ndarray: + ''' + Returns a stacked posterior array, with optional thinning, combining + all the chains together. + + Parameters + ---------- + thin : int + + Returns + ------- + numpy.ndarray + ''' + trace = az.from_netcdf(self.backend) + return np.stack([ # i.e., get every ith element, each chain + trace['posterior'][p].values[:,::thin].ravel() + for p in self.required_parameters[self.name] + ], axis = -1) + + def get_trace( + self, thin: int = None, burn: int = None + ) -> az.data.inference_data.InferenceData: + ''' + Extracts the trace from the backend data store. + + Parameters + ---------- + thin : int + Thinning rate + burn : int + The burn-in (i.e., first N samples to discard) + ''' + trace = az.from_netcdf(self.backend) + if thin is None and burn is None: + return trace + return trace.sel(draw = slice(burn, None, thin)) + + def plot_autocorr(self, thin: int = None, burn: int = None, **kwargs): + ''' + Auto-correlation plot for an MCMC sample. + + Parameters + ---------- + thin : int + Thinning rate + burn : int + The burn-in (i.e., first N samples to discard) + **kwargs + Additional keyword arguments to `arviz.plot_autocorr()`. + ''' + assert os.path.exists(self.backend),\ + 'Could not find file backend!' + trace = az.from_netcdf(self.backend) + kwargs.setdefault('combined', True) + if thin is None: + az.plot_autocorr(trace, **kwargs) + else: + burn = 0 if burn is None else burn + az.plot_autocorr( + trace.sel(draw = slice(burn, None, thin))['posterior'], + **kwargs) + pyplot.show() + + def plot_forest(self, **kwargs): + ''' + Forest plot for an MCMC sample. + + In particular: + + - `hdi_prob`: A float indicating the highest density interval (HDF) to + plot + ''' + assert os.path.exists(self.backend),\ + 'Could not find file backend!' + trace = az.from_netcdf(self.backend) + az.plot_forest(trace, **kwargs) + pyplot.show() + + def plot_pair(self, **kwargs): + ''' + Paired variables plot for an MCMC sample. + + Parameters + ---------- + **kwargs + Additional keyword arguments to `arviz.plot_pair()`. + ''' + assert os.path.exists(self.backend),\ + 'Could not find file backend!' + trace = az.from_netcdf(self.backend) + az.plot_pair(trace, **kwargs) + pyplot.show() + + def plot_partial_score( + self, observed: Sequence, drivers: Sequence, fit: dict = None): + ''' + Plots the "partial effect" of a single parameter: the score of the + model at that parameter's current value against a sweep of possible + parameter values. All other parameters are held fixed at the best-fit + values. + + Parameters + ---------- + observed : Sequence + Sequence of observed values that will be used to calibrate the model; + i.e., model is scored by how close its predicted values are to the + observed values + drivers : list or tuple + Sequence of driver datasets to be supplied, in order, to the + model's run function + fit : dict or None + The best-fit parameter values used for those parameters that are + fixed + ''' + trace = az.from_netcdf(self.backend) + if fit is None: + # Mean of posterior are "best fit" values + fit = trace['posterior'].mean() + fit_params = list(filter( + lambda p: p in fit, self.required_parameters[self.name])) + # The NPP model depends on constants not included in the fit + constants = [] + if self.name == 'NPP': + constants = [ + self.params[p] + for p in ['LUE_max', 'tmin0', 'tmin1', 'vpd0', 'vpd1'] + ] + n = len(fit_params) + nrow = n + ncol = 1 + if n > 4: + nrow = 2 + ncol = n - (n // 2) + fig, axes = pyplot.subplots( + nrow, ncol, figsize = (n * 2, n), sharey = True) + i = 0 + for j in range(nrow): + for k in range(ncol): + if i >= n: + break + free_param = fit_params[i] + fixed = np.stack([ + fit[p].values for p in fit_params + ])[np.newaxis,:].repeat(30, axis = 0) + sweep = np.linspace( + trace['posterior'][free_param].min(), + trace['posterior'][free_param].max(), num = 30) + fixed[:,i] = sweep + # Need to concatenate GPP parameters at begining of fixed + scores = -np.array(self.score_posterior( + observed, drivers, [ + [*constants, *f] for f in fixed.tolist() + ])) + axes[j,k].plot(sweep, scores, 'k-') + axes[j,k].axvline( + fit[free_param], color = 'red', linestyle = 'dashed', + label = 'Posterior Mean') + axes[j,k].set_xlabel(free_param) + axes[j,k].set_title(free_param) + axes[j,k].legend() + i += 1 + # Delete the last empty subplot + if n % 2 != 0: + fig.delaxes(axes.flatten()[-1]) + axes[0, 0].set_ylabel('Score') + pyplot.tight_layout() + pyplot.show() + + def plot_posterior(self, **kwargs): + ''' + Plots the posterior distribution for an MCMC sample. + + Parameters + ---------- + **kwargs + Additional keyword arguments to `arviz.plot_posterior()`. + ''' + assert os.path.exists(self.backend),\ + 'Could not find file backend!' + trace = az.from_netcdf(self.backend) + az.plot_posterior(trace, **kwargs) + pyplot.show() + + def score_posterior( + self, observed: Sequence, drivers: Sequence, posterior: Sequence, + method: str = 'rmsd') -> Number: + ''' + Returns a goodness-of-fit score based on the existing calibration. + + Parameters + ---------- + observed : Sequence + Sequence of observed values that will be used to calibrate the model; + i.e., model is scored by how close its predicted values are to the + observed values + drivers : list or tuple + Sequence of driver datasets to be supplied, in order, to the + model's run function + posterior : list or tuple + Sequence of posterior parameter sets (i.e., nested sequence); each + nested sequence will be scored + method : str + The method for generating a goodness-of-git score + (Default: "rmsd") + + Returns + ------- + float + ''' + if method != 'rmsd': + raise NotImplementedError('"method" must be one of: "rmsd"') + score_func = partial( + rmsd, func = self.model, observed = observed, drivers = drivers) + with get_context('spawn').Pool() as pool: + scores = pool.map(score_func, posterior) + return scores + + +class StochasticSampler(AbstractSampler): + ''' + A Markov Chain-Monte Carlo (MCMC) sampler for an arbitrary model. The + specific sampler used is the Differential Evolution (DE) MCMC algorithm + described by Ter Braak (2008), though the implementation is specific to + the PyMC3 library. + + NOTE: The `model` (a function) should be named "_name" where "name" is + some uniquely identifiable model name. This helps `StochasticSampler.run()` + to find the correct compiler for the model. The compiler function should + be named `compiled_name_model()` (where "name" is the model name) and be + defined on a subclass of `StochasticSampler`. + + Parameters + ---------- + config : dict + Dictionary of configuration parameters + model : Callable + The function to call (with driver data and parameters); this function + should take driver data as positional arguments and the model + parameters as a `*Sequence`; it should require no external state. + observed : Sequence + Sequence of observed values that will be used to calibrate the model; + i.e., model is scored by how close its predicted values are to the + observed values + params_dict : dict or None + Dictionary of model parameters, to be used as initial values and as + the basis for constructing a new dictionary of optimized parameters + backend : str or None + Path to a NetCDF4 file backend (Default: None) + weights : Sequence or None + Optional sequence of weights applied to the model residuals (as in + weighted least squares) + ''' + def __init__( + self, config: dict, model: Callable, params_dict: dict = None, + backend: str = None, weights: Sequence = None, + model_name: str = None): + self.backend = backend + # Convert the BOUNDS into nested dicts for easy use + self.bounds = dict([ + (key, dict([('lower', b[0]), ('upper', b[1])])) + for key, b in config['optimization']['bounds'].items() + ]) + self.config = config + self.model = model + if hasattr(model, '__name__'): + self.name = model.__name__.strip('_').upper() # "_gpp" = "GPP" + else: + self.name = model_name + self.params = params_dict + # Set the model's prior distribution assumptions + self.prior = dict() + for key in self.required_parameters[self.name]: + # NOTE: This is the default only for LUE_max; other parameters, + # with Uniform distributions, don't use anything here + self.prior.setdefault(key, { + 'mu': params_dict[key], + 'sigma': 2e-4 + }) + self.weights = weights + assert os.path.exists(os.path.dirname(backend)) + + def run( + self, observed: Sequence, drivers: Sequence, + draws = 1000, chains = 3, tune = 'lambda', + scaling: float = 1e-3, prior: dict = dict(), + check_shape: bool = False, save_fig: bool = False, + var_names: Sequence = None) -> None: + ''' + Fits the model using DE-MCMCz approach. `tune="lambda"` (default) is + recommended; lambda is related to the scale of the jumps learned from + other chains, but epsilon ("scaling") controls the scale directly. + Using a larger value for `scaling` (Default: 1e-3) will produce larger + jumps and may directly address "sticky" chains. + + Parameters + ---------- + observed : Sequence + The observed data the model will be calibrated against + drivers : list or tuple + Sequence of driver datasets to be supplied, in order, to the + model's run function + draws : int + Number of samples to draw (on each chain); defaults to 1000 + chains : int + Number of chains; defaults to 3 + tune : str or None + Which hyperparameter to tune: Defaults to 'lambda', but can also + be 'scaling' or None. + scaling : float + Initial scale factor for epsilon (Default: 1e-3) + prior : dict + check_shape : bool + True to require that input driver datasets have the same shape as + the observed values (Default: False) + save_fig : bool + True to save figures to files instead of showing them + (Default: False) + var_names : Sequence + One or more variable names to show in the plot + ''' + assert not check_shape or drivers[0].shape == observed.shape,\ + 'Driver data should have the same shape as the "observed" data' + assert len(drivers) == len(self.required_drivers[self.name]),\ + 'Did not receive expected number of driver datasets!' + assert tune in ('lambda', 'scaling') or tune is None + # Update prior assumptions + self.prior.update(prior) + # Generate an initial goodness-of-fit score + predicted = self.model([ + self.params[p] for p in self.required_parameters[self.name] + ], *drivers) + if self.weights is not None: + score = np.sqrt( + np.nanmean(((predicted - observed) * self.weights) ** 2)) + else: + score = np.sqrt(np.nanmean(((predicted - observed)) ** 2)) + print('-- RMSD at the initial point: %.3f' % score) + print('Compiling model...') + try: + compiler = getattr(self, 'compile_%s_model' % self.name.lower()) + except AttributeError: + raise AttributeError('''Could not find a compiler for model named + "%s"; make sure that a function "compile_%s_model()" is defined on + this class''' % (model_name, model_name)) + with compiler(observed, drivers) as model: + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + step_func = pm.DEMetropolisZ(tune = tune, scaling = scaling) + trace = pm.sample( + draws = draws, step = step_func, cores = chains, + chains = chains, idata_kwargs = {'log_likelihood': True}) + if self.backend is not None: + print('Writing results to file...') + trace.to_netcdf(self.backend) + if var_names is None: + az.plot_trace(trace, var_names = ['~log_likelihood']) + else: + az.plot_trace(trace, var_names = var_names) + if save_fig: + pyplot.savefig('.'.join(self.backend.split('.')[:-1]) + '.png') + else: + pyplot.show() + + +class MOD17StochasticSampler(StochasticSampler): + ''' + A Markov Chain-Monte Carlo (MCMC) sampler for MOD17. The specific sampler + used is the Differential Evolution (DE) MCMC algorithm described by + Ter Braak (2008), though the implementation is specific to the PyMC3 + library. + + Considerations: + + 1. Tower GPP is censored when values are < 0 or when APAR is + < 0.1 MJ m-2 d-1. + + Parameters + ---------- + config : dict + Dictionary of configuration parameters + model : Callable + The function to call (with driver data and parameters); this function + should take driver data as positional arguments and the model + parameters as a `*Sequence`; it should require no external state. + observed : Sequence + Sequence of observed values that will be used to calibrate the model; + i.e., model is scored by how close its predicted values are to the + observed values + params_dict : dict or None + Dictionary of model parameters, to be used as initial values and as + the basis for constructing a new dictionary of optimized parameters + backend : str or None + Path to a NetCDF4 file backend (Default: None) + weights : Sequence or None + Optional sequence of weights applied to the model residuals (as in + weighted least squares) + ''' + # NOTE: This is different than for mod17.MOD17 because we haven't yet + # figured out how the respiration terms are calculated + required_parameters = { + 'GPP': ['LUE_max', 'tmin0', 'tmin1', 'vpd0', 'vpd1'], + 'NPP': MOD17.required_parameters + } + required_drivers = { + 'GPP': ['fPAR', 'Tmin', 'VPD', 'PAR'], + 'NPP': ['fPAR', 'Tmin', 'VPD', 'PAR', 'LAI', 'Tmean', 'years'] + } + + def compile_gpp_model( + self, observed: Sequence, drivers: Sequence) -> pm.Model: + ''' + Creates a new GPP model based on the prior distribution. Model can be + re-compiled multiple times, e.g., for cross validation. + + Parameters + ---------- + observed : Sequence + Sequence of observed values that will be used to calibrate the model; + i.e., model is scored by how close its predicted values are to the + observed values + drivers : list or tuple + Sequence of driver datasets to be supplied, in order, to the + model's run function + + Returns + ------- + pm.Model + ''' + # Define the objective/ likelihood function + log_likelihood = BlackBoxLikelihood( + self.model, observed, x = drivers, weights = self.weights, + objective = self.config['optimization']['objective'].lower()) + # With this context manager, "all PyMC3 objects introduced in the indented + # code block...are added to the model behind the scenes." + with pm.Model() as model: + # (Stochstic) Priors for unknown model parameters + LUE_max = pm.TruncatedNormal('LUE_max', + **self.prior['LUE_max'], **self.bounds['LUE_max']) + # NOTE: tmin0, vpd0 are fixed at Collection 6.1 values + tmin0 = self.params['tmin0'] + tmin1 = pm.Uniform('tmin1', **self.bounds['tmin1']) + # NOTE: Upper bound on `vpd1` is set by the maximum observed VPD + vpd0 = self.params['vpd0'] + vpd1 = pm.Uniform('vpd1', + lower = self.bounds['vpd1']['lower'], + upper = drivers[2].max().round(0)) + # Convert model parameters to a tensor vector + params_list = [LUE_max, tmin0, tmin1, vpd0, vpd1] + params = at.as_tensor_variable(params_list) + # Key step: Define the log-likelihood as an added potential + pm.Potential('likelihood', log_likelihood(params)) + return model + + def compile_npp_model( + self, observed: Sequence, drivers: Sequence) -> pm.Model: + ''' + Creates a new NPP model based on the prior distribution. Model can be + re-compiled multiple times, e.g., for cross validation. + + Parameters + ---------- + observed : Sequence + Sequence of observed values that will be used to calibrate the model; + i.e., model is scored by how close its predicted values are to the + observed values + drivers : list or tuple + Sequence of driver datasets to be supplied, in order, to the + model's run function + + Returns + ------- + pm.Model + ''' + # Define the objective/ likelihood function + log_likelihood = BlackBoxLikelihood( + self.model, observed, x = drivers, weights = self.weights, + objective = self.config['optimization']['objective'].lower()) + # With this context manager, "all PyMC3 objects introduced in the indented + # code block...are added to the model behind the scenes." + with pm.Model() as model: + # Setting GPP parameters that are known--EXCEPT tmin1 + LUE_max = self.params['LUE_max'] + tmin0 = self.params['tmin0'] + tmin1 = self.params['tmin1'] + vpd0 = self.params['vpd0'] + vpd1 = self.params['vpd1'] + # SLA fixed at prior mean + SLA = np.exp(self.prior['SLA']['mu']) + # Allometry ratios prescribe narrow range around Collection 6.1 values + froot_leaf_ratio = pm.Triangular( + 'froot_leaf_ratio', **self.prior['froot_leaf_ratio']) + # (Stochstic) Priors for unknown model parameters + Q10_froot = pm.TruncatedNormal( + 'Q10_froot', **self.prior['Q10_froot'], **self.bounds['Q10']) + leaf_mr_base = pm.LogNormal( + 'leaf_mr_base', **self.prior['leaf_mr_base']) + froot_mr_base = pm.LogNormal( + 'froot_mr_base', **self.prior['froot_mr_base']) + # For GRS and CRO, livewood mass and respiration are zero + if np.equal(list(self.prior['livewood_mr_base'].values()), [0, 0]).all(): + livewood_leaf_ratio = 0 + livewood_mr_base = 0 + Q10_livewood = 0 + else: + livewood_leaf_ratio = pm.Triangular( + 'livewood_leaf_ratio', **self.prior['livewood_leaf_ratio']) + livewood_mr_base = pm.LogNormal( + 'livewood_mr_base', **self.prior['livewood_mr_base']) + Q10_livewood = pm.TruncatedNormal( + 'Q10_livewood', **self.prior['Q10_livewood'], + **self.bounds['Q10']) + # Convert model parameters to a tensor vector + params_list = [ + LUE_max, tmin0, tmin1, vpd0, vpd1, SLA, + Q10_livewood, Q10_froot, froot_leaf_ratio, livewood_leaf_ratio, + leaf_mr_base, froot_mr_base, livewood_mr_base + ] + params = at.as_tensor_variable(params_list) + # Key step: Define the log-likelihood as an added potential + pm.Potential('likelihood', log_likelihood(params)) + return model + + +class CalibrationAPI(object): + ''' + Convenience class for calibrating the MOD17 GPP and NPP models. Meant to + be used with `fire.Fire()`. + ''' + def __init__(self, config = None): + config_file = config + if config_file is None: + config_file = os.path.join( + MOD17_DIR, 'data/MOD17_calibration_config.json') + with open(config_file, 'r') as file: + self.config = json.load(file) + self.hdf5 = self.config['data']['file'] + + def _clean(self, raw: Sequence, drivers: Sequence, protocol: str = 'GPP'): + 'Cleans up data values according to a prescribed protocol' + if protocol == 'GPP': + # Filter out observed GPP values when GPP is negative or when + # APAR < 0.1 g C m-2 day-1 + apar = drivers['fPAR'] * drivers['PAR'] + return np.where( + apar < 0.1, np.nan, np.where(raw < 0, np.nan, raw)) + + def _filter(self, raw: Sequence, size: int): + 'Apply a smoothing filter with zero phase offset' + if size > 1: + window = np.ones(size) / size + return np.apply_along_axis( + lambda x: signal.filtfilt(window, np.ones(1), x), 0, raw) + return raw # Or, revert to the raw data + + def clean_observed( + self, raw: Sequence, drivers: Sequence, driver_labels: Sequence, + protocol: str = 'GPP', filter_length: int = 2) -> Sequence: + ''' + Cleans observed tower flux data according to a prescribed protocol. + + - For GPP data: Removes observations where GPP < 0 or where APAR is + < 0.1 MJ m-2 day-1 + + Parameters + ---------- + raw : Sequence + drivers : Sequence + driver_labels : Sequence + protocol : str + filter_length : int + The window size for the smoothing filter, applied to the observed + data + + Returns + ------- + Sequence + ''' + if protocol != 'GPP': + raise NotImplementedError('"protocol" must be one of: "GPP"') + # Read in the observed data and apply smoothing filter + obs = self._filter(raw, filter_length) + obs = self._clean(obs, dict([ + (driver_labels[i], data) + for i, data in enumerate(drivers) + ]), protocol = 'GPP') + return obs + + def export_bplut( + self, model: str, output_path: str, thin: int = 10, + burn: int = 1000): + ''' + Export the BPLUT using the posterior mean from the MCMC sampler. NOTE: + The posterior mean is usually not the best estimate for poorly + identified parameters. + + Parameters + ---------- + model : str + The name of the model ("GPP" or "NPP") + output_path : str + The output CSV file path + thin : int + Thinning rate + burn : int + The burn-in (i.e., first N samples to discard) + ''' + params_dict = restore_bplut(self.config['BPLUT'][model]) + bplut = params_dict.copy() + # Filter the parameters to just those for the PFT of interest + for pft in PFT_VALID: + backend = self.config['optimization']['backend_template'] %\ + (model, pft) + params = dict([(k, v[pft]) for k, v in params_dict.items()]) + sampler = MOD17StochasticSampler( + self.config, getattr(MOD17, '_%s' % model.lower()), params, + backend = backend) + trace = sampler.get_trace() + fit = trace.sel( + draw = slice(burn, None, thin))['posterior'].mean() + for each in MOD17.required_parameters: + try: + bplut[each][pft] = float(fit[each]) + except KeyError: + continue + write_bplut(bplut, output_path) + + def export_posterior( + self, model: str, param: str, output_path: str, thin: int = 10, + burn: int = 1000, k_folds: int = 1): + ''' + Exports posterior distribution for a parameter, for each PFT to HDF5. + + Parameters + ---------- + model : str + The name of the model ("GPP" or "NPP") + param : str + The model parameter to export + output_path : str + The output HDF5 file path + thin : int + Thinning rate + burn : int + The burn-in (i.e., first N samples to discard) + k_folds : int + The number of k-folds used in cross-calibration/validation; + if more than one (default), the folds for each PFT will be + combined into a single HDF5 file + ''' + params_dict = restore_bplut(self.config['BPLUT'][model]) + bplut = params_dict.copy() + # Filter the parameters to just those for the PFT of interest + post = [] + for pft in PFT_VALID: + params = dict([(k, v[pft]) for k, v in params_dict.items()]) + backend = self.config['optimization']['backend_template'] %\ + (model, pft) + post_by_fold = [] + for fold in range(1, k_folds + 1): + if k_folds > 1: + backend = self.config['optimization']['backend_template'] %\ + (f'{model}-k{fold}', pft) + sampler = MOD17StochasticSampler( + self.config, getattr(MOD17, '_%s' % model.lower()), params, + backend = backend) + trace = sampler.get_trace() + fit = trace.sel(draw = slice(burn, None, thin))['posterior'] + num_samples = fit.sizes['chain'] * fit.sizes['draw'] + if param in fit: + post_by_fold.append( + az.extract_dataset(fit, combined = True)[param].values) + else: + # In case there is, e.g., a parameter that takes on a + # constant value for a specific PFT + if k_folds > 1: + post_by_fold.append(np.ones((1, num_samples)) * np.nan) + else: + post_by_fold.append(np.ones(num_samples) * np.nan) + if k_folds > 1: + post.append(np.vstack(post_by_fold)) + else: + post.extend(post_by_fold) + # If not every PFT's posterior has the same number of samples (e.g., + # when one set of chains was run longer than another)... + if not all([p.shape == post[0].shape for p in post]): + max_len = max([p.shape for p in post])[0] + # ...Reshape all posteriors to match the greatest sample size + import ipdb + ipdb.set_trace()#FIXME + post = [ + np.pad( + p.astype(np.float32), (0, max_len - p.size), + mode = 'constant', constant_values = (np.nan,)) + for p in post + ] + with h5py.File(output_path, 'a') as hdf: + post = np.stack(post) + ts = datetime.date.today().strftime('%Y-%m-%d') # Today's date + dataset = hdf.create_dataset( + f'{param}_posterior', post.shape, np.float32, post) + dataset.attrs['description'] = 'CalibrationAPI.export_posterior() on {ts}' + + def export_likely_posterior( + self, model: str, param: str, output_path: str, thin: int = 10, + burn: int = 1000, ptile: int = 95): + ''' + Exports posterior distribution for a parameter based on likelihood + + Parameters + ---------- + model : str + The name of the model ("GPP" or "NPP") + param : str + The model parameter to export + output_path : str + The output HDF5 file path + thin : int + Thinning rate + burn : int + The burn-in (i.e., first N samples to discard) + ptile : int + The percentile cutoff for likelihood; only samples at or above + this likelihood cutoff will be included + ''' + params_dict = restore_bplut(self.config['BPLUT'][model]) + bplut = params_dict.copy() + # Filter the parameters to just those for the PFT of interest + post = [] + likelihood = [] + for pft in PFT_VALID: + backend = self.config['optimization']['backend_template'] % (model, pft) + params = dict([(k, v[pft]) for k, v in params_dict.items()]) + sampler = MOD17StochasticSampler( + self.config, getattr(MOD17, '_%s' % model.lower()), params, + backend = backend) + trace = sampler.get_trace() + fit = trace.sel(draw = slice(burn, None, thin)) + # Find the likelihood value associated with the cutoff percentile + ll = az.extract_dataset( + fit, combined = True)['log_likelihood'].values + values = az.extract_dataset(fit, combined = True)[param].values + cutoff = np.percentile(ll, ptile) + post.append(values[ll >= cutoff]) + likelihood.append(ll[ll >= cutoff]) + with h5py.File(output_path, 'a') as hdf: + n = max([len(p) for p in post]) + # Make sure all arrays are the same size + post = np.stack([ + np.concatenate((p, np.full((n - len(p),), np.nan))) + for p in post + ]) + likelihood = np.stack([ + np.concatenate((p, np.full((n - len(p),), np.nan))) + for p in likelihood + ]) + ts = datetime.date.today().strftime('%Y-%m-%d') # Today's date + dataset = hdf.create_dataset( + f'{param}_posterior', post.shape, np.float32, post) + dataset.attrs['description'] = 'CalibrationAPI.export_likely_posterior() on {ts}' + dataset = hdf.create_dataset( + f'{param}_likelihood', likelihood.shape, np.float32, likelihood) + + def tune_gpp( + self, pft: int, plot_trace: bool = False, ipdb: bool = False, + save_fig: bool = False, **kwargs): + ''' + Run the MOD17 GPP calibration. + + Parameters + ---------- + pft : int + The Plant Functional Type (PFT) to calibrate + plot_trace : bool + True to plot the trace for a previous calibration run; this will + also NOT start a new calibration (Default: False) + ipdb : bool + True to drop the user into an ipdb prompt, prior to and instead of + running calibration + save_fig : bool + True to save figures to files instead of showing them + (Default: False) + **kwargs + Additional keyword arguments passed to + `MOD17StochasticSampler.run()` + ''' + assert pft in PFT_VALID, f'Invalid PFT: {pft}' + # Pass configuration parameters to MOD17StochasticSampler.run() + for key in ('chains', 'draws', 'tune', 'scaling'): + if key in self.config['optimization'].keys() and not key in kwargs.keys(): + kwargs[key] = self.config['optimization'][key] + # Filter the parameters to just those for the PFT of interest + params_dict = restore_bplut(self.config['BPLUT']['GPP']) + # Load blacklisted sites (if any) + blacklist = self.config['data']['sites_blacklisted'] + params_dict = dict([(k, v[pft]) for k, v in params_dict.items()]) + model = MOD17(params_dict) + objective = self.config['optimization']['objective'].lower() + + print('Loading driver datasets...') + with h5py.File(self.hdf5, 'r') as hdf: + sites = hdf['FLUXNET/site_id'][:] + if hasattr(sites[0], 'decode'): + sites = list(map(lambda x: x.decode('utf-8'), sites)) + # Get dominant PFT + pft_map = pft_dominant(hdf['state/PFT'][:], site_list = sites) + # Blacklist various sites + pft_mask = np.logical_and(pft_map == pft, ~np.in1d(sites, blacklist)) + # NOTE: Converting from Kelvin to Celsius + tday = hdf['MERRA2/T10M_daytime'][:][:,pft_mask] - 273.15 + qv10m = hdf['MERRA2/QV10M_daytime'][:][:,pft_mask] + ps = hdf['MERRA2/PS_daytime'][:][:,pft_mask] + drivers = [ # fPAR, Tmin, VPD, PAR + hdf['MODIS/MOD15A2HGF_fPAR_interp'][:][:,pft_mask], + hdf['MERRA2/Tmin'][:][:,pft_mask] - 273.15, + MOD17.vpd(qv10m, ps, tday), + MOD17.par(hdf['MERRA2/SWGDN'][:][:,pft_mask]), + ] + # Convert fPAR from (%) to [0,1] + drivers[0] = np.nanmean(drivers[0], axis = -1) / 100 + # If RMSE is used, then we want to pay attention to weighting + weights = None + if objective in ('rmsd', 'rmse'): + weights = hdf['weights'][pft_mask][np.newaxis,:]\ + .repeat(tday.shape[0], axis = 0) + # Check that driver data do not contain NaNs + for d, each in enumerate(drivers): + name = ('fPAR', 'Tmin', 'VPD', 'PAR')[d] + assert not np.isnan(each).any(),\ + f'Driver dataset "{name}" contains NaNs' + tower_gpp = hdf['FLUXNET/GPP'][:][:,pft_mask] + # Read the validation mask; mask out observations that are + # reserved for validation + print('Masking out validation data...') + mask = hdf['FLUXNET/validation_mask'][pft] + tower_gpp[mask] = np.nan + + # Clean observations, then mask out driver data where the are no + # observations + tower_gpp = self.clean_observed( + tower_gpp, drivers, MOD17StochasticSampler.required_drivers['GPP'], + protocol = 'GPP') + if weights is not None: + weights = weights[~np.isnan(tower_gpp)] + for d, _ in enumerate(drivers): + drivers[d] = drivers[d][~np.isnan(tower_gpp)] + tower_gpp = tower_gpp[~np.isnan(tower_gpp)] + + print('Initializing sampler...') + backend = self.config['optimization']['backend_template'] % ('GPP', pft) + sampler = MOD17StochasticSampler( + self.config, MOD17._gpp, params_dict, backend = backend, + weights = weights) + if plot_trace or ipdb: + if ipdb: + import ipdb + ipdb.set_trace() + trace = sampler.get_trace() + az.plot_trace(trace, var_names = MOD17.required_parameters[0:5]) + pyplot.show() + return + # Get (informative) priors for just those parameters that have them + with open(self.config['optimization']['prior'], 'r') as file: + prior = json.load(file) + prior_params = filter( + lambda p: p in prior.keys(), sampler.required_parameters['GPP']) + prior = dict([ + (p, {'mu': prior[p]['mu'][pft], 'sigma': prior[p]['sigma'][pft]}) + for p in prior_params + ]) + sampler.run( + tower_gpp, drivers, prior = prior, save_fig = save_fig, **kwargs) + + def tune_npp( + self, pft: int, plot_trace: bool = False, ipdb: bool = False, + save_fig: bool = False, climatology = False, + cutoff: Number = 2385, k_folds: int = 1, **kwargs): + r''' + Run the MOD17 NPP calibration. If k-folds cross-validation is used, + the model is calibrated on $k$ random subsets of the data and a + series of file is created, e.g., as: + + MOD17_NPP_calibration_PFT1.h5 + MOD17_NPP-k1_calibration_PFT1.nc4 + MOD17_NPP-k2_calibration_PFT1.nc4 + ... + + Where each `.nc4` file is a standard `arviz` backend and the `.h5` + indicates which indices from the NPP observations vector, after + removing NaNs, were excluded (i.e., the indices of the test data). + + Parameters + ---------- + pft : int + The Plant Functional Type (PFT) to calibrate + plot_trace : bool + True to display the trace plot ONLY and not run calibration + (Default: False) + ipdb : bool + True to drop into an interactive Python debugger (`ipdb`) after + loading an existing trace (Default: False) + save_fig : bool + True to save the post-calibration trace plot to a file instead of + displaying it (Default: False) + climatology : bool + True to use a MERRA-2 climatology (and look for it in the drivers + file), i.e., use `MERRA2_climatology` group instead of + `surface_met_MERRA2` group (Default: False) + cutoff : Number + Maximum value of observed NPP (g C m-2 year-1); values above this + cutoff will be discarded and not used in calibration + (Default: 2385) + k_folds : int + Number of folds to use in k-folds cross-validation; defaults to + k=1, i.e., no cross-validation is performed. + **kwargs + Additional keyword arguments passed to + `MOD17StochasticSampler.run()` + ''' + assert pft in PFT_VALID, f'Invalid PFT: {pft}' + prefix = 'MERRA2_climatology' if climatology else 'surface_met_MERRA2' + params_dict = restore_bplut(self.config['BPLUT']['NPP']) + # Filter the parameters to just those for the PFT of interest + params_dict = dict([(k, v[pft]) for k, v in params_dict.items()]) + model = MOD17(params_dict) + kwargs.update({'var_names': [ + '~LUE_max', '~tmin0', '~tmin1', '~vpd0', '~vpd1', '~log_likelihood' + ]}) + # Pass configuration parameters to MOD17StochasticSampler.run() + for key in ('chains', 'draws', 'tune', 'scaling'): + if key in self.config['optimization'].keys(): + kwargs[key] = self.config['optimization'][key] + print('Loading driver datasets...') + with h5py.File(self.hdf5, 'r') as hdf: + # NOTE: This is only recorded at the site-level; no need to + # determine modal PFT across subgrid + pft_map = hdf['NPP/PFT'][:] + # Leave out sites where there is no fPAR (and no LAI) data + fpar = hdf['NPP/MOD15A2H_fPAR_clim_filt'][:] + mask = np.logical_and( + pft_map == pft, ~np.isnan(np.nanmean(fpar, axis = -1))\ + .all(axis = 0)) + # NOTE: Converting from Kelvin to Celsius + tday = hdf[f'NPP/{prefix}/T10M_daytime'][:][:,mask] - 273.15 + qv10m = hdf[f'NPP/{prefix}/QV10M_daytime'][:][:,mask] + ps = hdf[f'NPP/{prefix}/PS_daytime'][:][:,mask] + drivers = [ # fPAR, Tmin, VPD, PAR, LAI, Tmean, years + hdf['NPP/MOD15A2H_fPAR_clim_filt'][:][:,mask], + hdf[f'NPP/{prefix}/Tmin'][:][:,mask] - 273.15, + MOD17.vpd(qv10m, ps, tday), + MOD17.par(hdf[f'NPP/{prefix}/SWGDN'][:][:,mask]), + hdf['NPP/MOD15A2H_LAI_clim_filt'][:][:,mask], + hdf[f'NPP/{prefix}/T10M'][:][:,mask] - 273.15, + np.full(ps.shape, 1) # i.e., A 365-day climatological year ("Year 1") + ] + observed_npp = hdf['NPP/NPP_total'][:][mask] + if cutoff is not None: + observed_npp[observed_npp > cutoff] = np.nan + # Set negative VPD to zero + drivers[2] = np.where(drivers[2] < 0, 0, drivers[2]) + # Convert fPAR from (%) to [0,1] and re-scale LAI; reshape fPAR, LAI + drivers[0] = np.nanmean(drivers[0], axis = -1) * 0.01 + drivers[4] = np.nanmean(drivers[4], axis = -1) * 0.1 + # TODO Mask out driver data where the are no observations + for d, _ in enumerate(drivers): + drivers[d] = drivers[d][:,~np.isnan(observed_npp)] + observed_npp = observed_npp[~np.isnan(observed_npp)] + + if k_folds > 1: + # Back-up the original (complete) datasets + _drivers = [d.copy() for d in drivers] + _observed_npp = observed_npp.copy() + # Randomize the indices of the NPP data + indices = np.arange(0, observed_npp.size) + np.random.shuffle(indices) + # Get the starting and ending index of each fold + fold_idx = np.array([indices.size // k_folds] * k_folds) * np.arange(0, k_folds) + fold_idx = list(map(list, zip(fold_idx, fold_idx + indices.size // k_folds))) + # Ensure that the entire dataset is used + fold_idx[-1][-1] = indices.max() + idx_test = [indices[start:end] for start, end in fold_idx] + + # Loop over each fold (or the entire dataset, if num. folds == 1) + for k, fold in enumerate(range(1, k_folds + 1)): + backend = self.config['optimization']['backend_template'] % ('NPP', pft) + if k_folds > 1 and fold == 1: + # Create an HDF5 file with the same name as the (original) + # netCDF4 back-end, store the test indices + with h5py.File(backend.replace('nc4', 'h5'), 'w') as hdf: + out = list(idx_test) + size = indices.size // k_folds + try: + out = np.stack(out) + except ValueError: + size = max((o.size for o in out)) + for i in range(0, len(out)): + out[i] = np.concatenate((out[i], [np.nan] * (size - out[i].size))) + hdf.create_dataset( + 'test_indices', (k_folds, size), np.int32, np.stack(out)) + # Restore the original NPP dataset + observed_npp = _observed_npp.copy() + # Set to NaN all the test indices + idx = idx_test[k] + observed_npp[idx] = np.nan + # Same for drivers, after restoring from the original + drivers = [d.copy()[:,~np.isnan(observed_npp)] for d in _drivers] + observed_npp = observed_npp[~np.isnan(observed_npp)] + # Use a different naming scheme for the backend + if k_folds > 1: + backend = self.config['optimization']['backend_template'] % (f'NPP-k{fold}', pft) + + print('Initializing sampler...') + sampler = MOD17StochasticSampler( + self.config, MOD17._npp, params_dict, backend = backend, + model_name = 'NPP') + if plot_trace or ipdb: + if ipdb: + import ipdb + ipdb.set_trace() + trace = sampler.get_trace() + az.plot_trace( + trace, var_names = MOD17.required_parameters[5:]) + pyplot.show() + return + # Get (informative) priors for just those parameters that have them + with open(self.config['optimization']['prior'], 'r') as file: + prior = json.load(file) + prior_params = filter( + lambda p: p in prior.keys(), sampler.required_parameters['NPP']) + prior = dict([ + (p, prior[p]) for p in prior_params + ]) + for key in prior.keys(): + # And make sure to subset to the chosen PFT! + for arg in prior[key].keys(): + prior[key][arg] = prior[key][arg][pft] + sampler.run( + observed_npp, drivers, prior = prior, save_fig = save_fig, + **kwargs) + + +if __name__ == '__main__': + import fire + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + fire.Fire(CalibrationAPI) diff --git a/mod17/data/MOD17_BPLUT_C5.1_MERRA_NASA.csv b/mod17/data/MOD17_BPLUT_C5.1_MERRA_NASA.csv new file mode 100644 index 0000000..865b401 --- /dev/null +++ b/mod17/data/MOD17_BPLUT_C5.1_MERRA_NASA.csv @@ -0,0 +1,13 @@ +UMD_VEG_LC,ENF=0,EBF=1,DNF=2,DBF=3,MF=4,CShrub=5,OShrub=6,WSavannas=7,Savannas=8, Grass=9,Crop=10 +LUEmax(KgC/m^2/d/MJ),0.001211,0.001405,0.001227,0.001526,0.001226,0.001495,0.001027,0.001498,0.001454,0.001215,0.0013 +Tmin_min(C),-8,-8,-8,-6,-7,-8,-8,-8,-8,-8,-8 +Tmin_max(C),8.31,9.09,10.44,9.94,9.5,8.61,8.8,11.39,11.39,12.02,12.02 +VPD_min(Pa),650,1000,650,650,650,650,650,650,650,650,650 +VPD_max(Pa),3000,4000,3500,2900,2900,4300,4400,3500,3600,4200,4500 +SLA(LAI/KgC),15,26.9,16.9,24.7,22.6,9.4,12,28.8,28.9,38,38 +Q10(unitless),2,2,2,2,2,2,2,2,2,2,2 +froot_leaf_ratio,1.2,1.1,1.7,1.1,1.1,1,1.3,1.8,1.8,2.6,2 +livewood_leaf_ratio,0.182,0.162,0.165,0.203,0.203,0.079,0.04,0.091,0.051,0,0 +leaf_mr_base(Kgc/KgC/d),0.00604,0.00604,0.00815,0.00778,0.00778,0.00869,0.00519,0.00869,0.00869,0.0098,0.0098 +froot_mr_base(Kgc/KgC/d),0.00519,0.00519,0.00519,0.00519,0.00519,0.00519,0.00519,0.00519,0.00519,0.00819,0.00819 +livewood_mr_base(Kgc/KgC/d),0.00397,0.00397,0.00397,0.00371,0.00371,0.00436,0.00218,0.00312,0.001,0,0 diff --git a/mod17/data/MOD17_BPLUT_CX.X_MERRA_NASA.csv b/mod17/data/MOD17_BPLUT_CX.X_MERRA_NASA.csv new file mode 100644 index 0000000..24503a1 --- /dev/null +++ b/mod17/data/MOD17_BPLUT_CX.X_MERRA_NASA.csv @@ -0,0 +1,14 @@ +UMD_VEG_LC,ENF=0,EBF=1,DNF=2,DBF=3,MF=4,CShrub=5,OShrub=6,WSavannas=7,Savannas=8, Grass=9,Crop=10 +LUEmax(KgC/m^2/d/MJ),0.001211,0.001405,0.001227,0.001526,0.001226,0.001495,0.001027,0.001498,0.001454,0.001215,0.0013 +Tmin_min(C),-8,-8,-8,-6,-7,-8,-8,-8,-8,-8,-8 +Tmin_max(C),8.31,9.09,10.44,9.94,9.5,8.61,8.8,11.39,11.39,12.02,12.02 +VPD_min(Pa),650,1000,650,650,650,650,650,650,650,650,650 +VPD_max(Pa),3000,4000,3500,2900,2900,4300,4400,3500,3600,4200,4500 +SLA(LAI/KgC),15,26.9,16.9,24.7,22.6,9.4,12,28.8,28.9,38,38 +Q10_livewood(unitless),2,2,2,2,2,2,2,2,2,2,2 +Q10_froot(unitless),2,2,2,2,2,2,2,2,2,2,2 +froot_leaf_ratio,1.2,1.1,1.7,1.1,1.1,1,1.3,1.8,1.8,2.6,2 +livewood_leaf_ratio,0.182,0.162,0.165,0.203,0.203,0.079,0.04,0.091,0.051,0,0 +leaf_mr_base(Kgc/KgC/d),0.00604,0.00604,0.00815,0.00778,0.00778,0.00869,0.00519,0.00869,0.00869,0.0098,0.0098 +froot_mr_base(Kgc/KgC/d),0.00519,0.00519,0.00519,0.00519,0.00519,0.00519,0.00519,0.00519,0.00519,0.00819,0.00819 +livewood_mr_base(Kgc/KgC/d),0.00397,0.00397,0.00397,0.00371,0.00371,0.00436,0.00218,0.00312,0.001,0,0 diff --git a/mod17/data/MOD17_BPLUT_prior_20220525.json b/mod17/data/MOD17_BPLUT_prior_20220525.json new file mode 100644 index 0000000..6ec4e9e --- /dev/null +++ b/mod17/data/MOD17_BPLUT_prior_20220525.json @@ -0,0 +1,82 @@ +{ + "LUE_max": { + "mu": [ + null, 0.00098, 0.0014 , 0.00123, 0.00168, 0.00143, 0.0008 , + 0.00074, 0.00093, 0.00093, 0.00119, null, 0.00194 + ], + "sigma": [ + null, 0.00032, 0.0002 , 0.0002 , 0.00035, 0.00037, 0.00038, + 0.00021, 0.00037, 0.00038, 0.00045, null, 0.00055 + ] + }, + "SLA": { + "mu": [ + null, 2.25, 2.68, 2.26, 2.95, 2.97, 2.66, + 2.62, 2.78, 2.76, 2.82, null, 2.98 + ], + "sigma": [ + null, 0.74, 0.45, 0.73, 0.55, 0.42, 0.53, + 0.56, 0.62, 0.57, 0.51, null, 0.50 + ] + }, + "Q10_livewood": { + "mu": [ + null, 1.89, 1.84, 1.89, 1.84, 1.84, 1.84, + 1.84, 1.84, 1.84, 0.00, null, 0.00 + ], + "sigma": [ + null, 0.27, 0.22, 0.27, 0.22, 0.22, 0.22, + 0.22, 0.22, 0.22, 0.00, null, 0.00 + ] + }, + "Q10_froot": { + "mu": [ + null, 1.60, 1.60, 1.60, 1.60, 1.60, 1.60, + 1.60, 1.60, 1.60, 1.60, null, 1.60 + ], + "sigma": [ + null, 1.60, 1.60, 1.60, 1.60, 1.60, 1.60, + 1.60, 1.60, 1.60, 1.60, null, 1.60 + ] + }, + "froot_leaf_ratio": { + "mu": [ + null, 0.009, 0.009, 0.009, -0.013, -0.014, -0.084, + -0.084, 0.009, 0.009, 0.075, null, 0.009 + ], + "sigma": [ + null, 0.216, 0.216, 0.216, 0.067, 0.124, 0.156, + 0.156, 0.216, 0.216, 0.333, null, 0.216 + ] + }, + "leaf_mr_base": { + "mu": [ + null, -4.65, -4.65, -4.35, -4.35, -4.56, -4.36, + -4.36, -4.36, -4.36, -4.03, null, -3.78 + ], + "sigma": [ + null, 0.668, 0.668, 0.546, 0.546, 0.614, 0.704, + 0.704, 0.704, 0.704, 0.536, null, 0.855 + ] + }, + "froot_mr_base": { + "mu": [ + null, -4.21, -4.21, -4.21, -4.21, -4.21, -4.21, + -4.21, -4.21, -4.21, -4.21, null, -4.21 + ], + "sigma": [ + null, 0.84, 0.84, 0.84, 0.84, 0.84, 0.84, + 0.84, 0.84, 0.84, 0.84, null, 0.84 + ] + }, + "livewood_mr_base": { + "mu": [ + null, -5.27, -5.27, -5.27, -5.27, -5.27, -5.27, + -5.27, -5.27, -5.27, 0.00, null, 0.00 + ], + "sigma": [ + null, 1.54, 1.54, 1.54, 1.54, 1.54, 1.54, + 1.54, 1.54, 1.54, 0.00, null, 0.00 + ] + } +} diff --git a/mod17/data/MOD17_BPLUT_prior_20220611.json b/mod17/data/MOD17_BPLUT_prior_20220611.json new file mode 100644 index 0000000..6b24647 --- /dev/null +++ b/mod17/data/MOD17_BPLUT_prior_20220611.json @@ -0,0 +1,82 @@ +{ + "LUE_max": { + "mu": [ + null, 0.00098, 0.0014 , 0.00123, 0.00168, 0.00143, 0.0008 , + 0.00074, 0.00093, 0.00093, 0.00119, null, 0.00194 + ], + "sigma": [ + null, 0.00032, 0.0002 , 0.0002 , 0.00035, 0.00037, 0.00038, + 0.00021, 0.00037, 0.00038, 0.00045, null, 0.00055 + ] + }, + "SLA": { + "mu": [ + null, 2.25, 2.68, 2.26, 2.95, 2.97, 2.66, + 2.62, 2.78, 2.76, 2.82, null, 2.98 + ], + "sigma": [ + null, 0.74, 0.45, 0.73, 0.55, 0.42, 0.53, + 0.56, 0.62, 0.57, 0.51, null, 0.50 + ] + }, + "Q10_livewood": { + "mu": [ + null, 1.89, 1.84, 1.89, 1.84, 1.84, 1.84, + 1.84, 1.84, 1.84, 0.00, null, 0.00 + ], + "sigma": [ + null, 0.27, 0.22, 0.27, 0.22, 0.22, 0.22, + 0.22, 0.22, 0.22, 0.00, null, 0.00 + ] + }, + "Q10_froot": { + "mu": [ + null, 1.60, 1.60, 1.60, 1.60, 1.60, 1.60, + 1.60, 1.60, 1.60, 1.60, null, 1.60 + ], + "sigma": [ + null, 1.60, 1.60, 1.60, 1.60, 1.60, 1.60, + 1.60, 1.60, 1.60, 1.60, null, 1.60 + ] + }, + "froot_leaf_ratio": { + "mu": [ + null, 1.2, 1.1, 1.7, 1.1, 1.1, 1.0, + 1.3, 1.8, 1.8, 2.6,null, 2.0 + ], + "sigma": [ + null, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, + 0.5, 0.5, 0.5, 0.5,null, 0.5 + ] + }, + "leaf_mr_base": { + "mu": [ + null, -4.65, -4.65, -4.35, -4.35, -4.56, -4.36, + -4.36, -4.36, -4.36, -4.03, null, -3.78 + ], + "sigma": [ + null, 0.668, 0.668, 0.546, 0.546, 0.614, 0.704, + 0.704, 0.704, 0.704, 0.536, null, 0.855 + ] + }, + "froot_mr_base": { + "mu": [ + null, -4.21, -4.21, -4.21, -4.21, -4.21, -4.21, + -4.21, -4.21, -4.21, -4.21, null, -4.21 + ], + "sigma": [ + null, 0.84, 0.84, 0.84, 0.84, 0.84, 0.84, + 0.84, 0.84, 0.84, 0.84, null, 0.84 + ] + }, + "livewood_mr_base": { + "mu": [ + null, -5.27, -5.27, -5.27, -5.27, -5.27, -5.27, + -5.27, -5.27, -5.27, 0.00, null, 0.00 + ], + "sigma": [ + null, 1.54, 1.54, 1.54, 1.54, 1.54, 1.54, + 1.54, 1.54, 1.54, 0.00, null, 0.00 + ] + } +} diff --git a/mod17/data/MOD17_BPLUT_prior_20220621.json b/mod17/data/MOD17_BPLUT_prior_20220621.json new file mode 100644 index 0000000..6d9a149 --- /dev/null +++ b/mod17/data/MOD17_BPLUT_prior_20220621.json @@ -0,0 +1,83 @@ +{ + "description": "Final prior for GPP and NPP models, where SLA is based on TRY data in C units and froot_leaf_ratio is fixed at C6.1 values", + "LUE_max": { + "mu": [ + null, 0.00098, 0.0014 , 0.00123, 0.00168, 0.00143, 0.0008 , + 0.00074, 0.00093, 0.00093, 0.00119, null, 0.00194 + ], + "sigma": [ + null, 0.00032, 0.0002 , 0.0002 , 0.00035, 0.00037, 0.00038, + 0.00021, 0.00037, 0.00038, 0.00045, null, 0.00055 + ] + }, + "SLA": { + "mu": [ + null, 3.03, 3.21, 3.77, 3.62, 3.56, 3.38, + 3.38, 3.60, 3.58, 3.59, null, 3.72 + ], + "sigma": [ + null, 0.958, 0.452, 0.204, 0.623, 0.580, 0.506, + 0.506, 0.696, 0.526, 0.542, null, 0.596 + ] + }, + "Q10_livewood": { + "mu": [ + null, 1.89, 1.84, 1.89, 1.84, 1.84, 1.84, + 1.84, 1.84, 1.84, 0.00, null, 0.00 + ], + "sigma": [ + null, 0.27, 0.22, 0.27, 0.22, 0.22, 0.22, + 0.22, 0.22, 0.22, 0.00, null, 0.00 + ] + }, + "Q10_froot": { + "mu": [ + null, 1.60, 1.60, 1.60, 1.60, 1.60, 1.60, + 1.60, 1.60, 1.60, 1.60, null, 1.60 + ], + "sigma": [ + null, 1.60, 1.60, 1.60, 1.60, 1.60, 1.60, + 1.60, 1.60, 1.60, 1.60, null, 1.60 + ] + }, + "froot_leaf_ratio": { + "mu": [ + null, 1.2, 1.1, 1.7, 1.1, 1.1, 1.0, + 1.3, 1.8, 1.8, 2.6,null, 2.0 + ], + "sigma": [ + null, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, + 0.5, 0.5, 0.5, 0.5,null, 0.5 + ] + }, + "leaf_mr_base": { + "mu": [ + null, -4.65, -4.65, -4.35, -4.35, -4.56, -4.36, + -4.36, -4.36, -4.36, -4.03, null, -3.78 + ], + "sigma": [ + null, 0.668, 0.668, 0.546, 0.546, 0.614, 0.704, + 0.704, 0.704, 0.704, 0.536, null, 0.855 + ] + }, + "froot_mr_base": { + "mu": [ + null, -4.21, -4.21, -4.21, -4.21, -4.21, -4.21, + -4.21, -4.21, -4.21, -4.21, null, -4.21 + ], + "sigma": [ + null, 0.84, 0.84, 0.84, 0.84, 0.84, 0.84, + 0.84, 0.84, 0.84, 0.84, null, 0.84 + ] + }, + "livewood_mr_base": { + "mu": [ + null, -5.27, -5.27, -5.27, -5.27, -5.27, -5.27, + -5.27, -5.27, -5.27, 0.00, null, 0.00 + ], + "sigma": [ + null, 1.54, 1.54, 1.54, 1.54, 1.54, 1.54, + 1.54, 1.54, 1.54, 0.00, null, 0.00 + ] + } +} diff --git a/mod17/data/MOD17_BPLUT_prior_20221027.json b/mod17/data/MOD17_BPLUT_prior_20221027.json new file mode 100644 index 0000000..c5dd031 --- /dev/null +++ b/mod17/data/MOD17_BPLUT_prior_20221027.json @@ -0,0 +1,101 @@ +{ + "description": "Adjusted SLA for Shrubs; added Triangular priors for allometric ratios; SLA is based on TRY data in C units and froot_leaf_ratio is fixed at C6.1 values", + "LUE_max": { + "mu": [ + null, 0.00098, 0.0014 , 0.00123, 0.00168, 0.00143, 0.0008 , + 0.00074, 0.00093, 0.00093, 0.00119, null, 0.00194 + ], + "sigma": [ + null, 0.00032, 0.0002 , 0.0002 , 0.00035, 0.00037, 0.00038, + 0.00021, 0.00037, 0.00038, 0.00045, null, 0.00055 + ] + }, + "SLA": { + "mu": [ + null, 3.03, 3.21, 3.77, 3.62, 3.56, 3.21, + 3.21, 3.60, 3.58, 3.59, null, 3.72 + ], + "sigma": [ + null, 0.958, 0.452, 0.204, 0.623, 0.580, 0.310, + 0.310, 0.696, 0.526, 0.542, null, 0.596 + ] + }, + "Q10_livewood": { + "mu": [ + null, 1.89, 1.84, 1.89, 1.84, 1.84, 1.84, + 1.84, 1.84, 1.84, 0.00, null, 0.00 + ], + "sigma": [ + null, 0.27, 0.22, 0.27, 0.22, 0.22, 0.22, + 0.22, 0.22, 0.22, 0.00, null, 0.00 + ] + }, + "Q10_froot": { + "mu": [ + null, 1.60, 1.60, 1.60, 1.60, 1.60, 1.60, + 1.60, 1.60, 1.60, 1.60, null, 1.60 + ], + "sigma": [ + null, 1.60, 1.60, 1.60, 1.60, 1.60, 1.60, + 1.60, 1.60, 1.60, 1.60, null, 1.60 + ] + }, + "froot_leaf_ratio": { + "c": [ + null, 1.2, 1.1, 1.7, 1.1, 1.1, 1.0, + 1.3, 1.8, 1.8, 2.6,null, 2.0 + ], + "lower": [ + null, 0.8, 0.7, 1.3, 0.7, 0.7, 0.6, + 0.9, 1.4, 1.4, 2.2,null, 1.8 + ], + "upper": [ + null, 1.6, 1.5, 2.1, 1.5, 1.5, 1.4, + 1.7, 2.2, 2.2, 3.0,null, 2.4 + ] + }, + "livewood_leaf_ratio": { + "c": [ + null, 0.182, 0.162, 0.165, 0.203, 0.203, 0.079, + 0.040, 0.091, 0.051, 0.0 , null, 0.0 + ], + "lower": [ + null, 0.142, 0.122, 0.125, 0.163, 0.163, 0.039, + 0.001, 0.051, 0.011, 0.0 , null, 0.0 + ], + "upper": [ + null, 0.222, 0.202, 0.205, 0.243, 0.243, 0.199, + 0.080, 0.131, 0.091, 0.0 , null, 0.0 + ] + }, + "leaf_mr_base": { + "mu": [ + null, -4.65, -4.65, -4.35, -4.35, -4.56, -4.36, + -4.36, -4.36, -4.36, -4.03, null, -3.78 + ], + "sigma": [ + null, 0.668, 0.668, 0.546, 0.546, 0.614, 0.704, + 0.704, 0.704, 0.704, 0.536, null, 0.855 + ] + }, + "froot_mr_base": { + "mu": [ + null, -4.21, -4.21, -4.21, -4.21, -4.21, -4.21, + -4.21, -4.21, -4.21, -4.21, null, -4.21 + ], + "sigma": [ + null, 0.84, 0.84, 0.84, 0.84, 0.84, 0.84, + 0.84, 0.84, 0.84, 0.84, null, 0.84 + ] + }, + "livewood_mr_base": { + "mu": [ + null, -5.27, -5.27, -5.27, -5.27, -5.27, -5.27, + -5.27, -5.27, -5.27, 0.00, null, 0.00 + ], + "sigma": [ + null, 1.54, 1.54, 1.54, 1.54, 1.54, 1.54, + 1.54, 1.54, 1.54, 0.00, null, 0.00 + ] + } +} diff --git a/mod17/data/MOD17_BPLUT_prior_20221223.json b/mod17/data/MOD17_BPLUT_prior_20221223.json new file mode 100644 index 0000000..d331e7c --- /dev/null +++ b/mod17/data/MOD17_BPLUT_prior_20221223.json @@ -0,0 +1,101 @@ +{ + "description": "Fixed issue with SLA prior means, std. dev. propagated from TRY; Adjusted SLA for Shrubs; added Triangular priors for allometric ratios; SLA is based on TRY data in C units and froot_leaf_ratio is fixed at C6.1 values", + "LUE_max": { + "mu": [ + null, 0.00098, 0.0014 , 0.00123, 0.00168, 0.00143, 0.0008 , + 0.00074, 0.00093, 0.00093, 0.00119, null, 0.00194 + ], + "sigma": [ + null, 0.00032, 0.0002 , 0.0002 , 0.00035, 0.00037, 0.00038, + 0.00021, 0.00037, 0.00038, 0.00045, null, 0.00055 + ] + }, + "SLA": { + "mu": [ + null, 2.75, 3.21, 3.21, 3.61, 3.56, 3.21, + 3.21, 3.60, 3.58, 3.60, null, 3.72 + ], + "sigma": [ + null, 0.779, 0.452, 0.273, 0.626, 0.581, 0.310, + 0.310, 0.696, 0.526, 0.543, null, 0.599 + ] + }, + "Q10_livewood": { + "mu": [ + null, 1.89, 1.84, 1.89, 1.84, 1.84, 1.84, + 1.84, 1.84, 1.84, 0.00, null, 0.00 + ], + "sigma": [ + null, 0.27, 0.22, 0.27, 0.22, 0.22, 0.22, + 0.22, 0.22, 0.22, 0.00, null, 0.00 + ] + }, + "Q10_froot": { + "mu": [ + null, 1.60, 1.60, 1.60, 1.60, 1.60, 1.60, + 1.60, 1.60, 1.60, 1.60, null, 1.60 + ], + "sigma": [ + null, 1.60, 1.60, 1.60, 1.60, 1.60, 1.60, + 1.60, 1.60, 1.60, 1.60, null, 1.60 + ] + }, + "froot_leaf_ratio": { + "c": [ + null, 1.2, 1.1, 1.7, 1.1, 1.1, 1.0, + 1.3, 1.8, 1.8, 2.6,null, 2.0 + ], + "lower": [ + null, 0.8, 0.7, 1.3, 0.7, 0.7, 0.6, + 0.9, 1.4, 1.4, 2.2,null, 1.8 + ], + "upper": [ + null, 1.6, 1.5, 2.1, 1.5, 1.5, 1.4, + 1.7, 2.2, 2.2, 3.0,null, 2.4 + ] + }, + "livewood_leaf_ratio": { + "c": [ + null, 0.182, 0.162, 0.165, 0.203, 0.203, 0.079, + 0.040, 0.091, 0.051, 0.0 , null, 0.0 + ], + "lower": [ + null, 0.142, 0.122, 0.125, 0.163, 0.163, 0.039, + 0.001, 0.051, 0.011, 0.0 , null, 0.0 + ], + "upper": [ + null, 0.222, 0.202, 0.205, 0.243, 0.243, 0.199, + 0.080, 0.131, 0.091, 0.0 , null, 0.0 + ] + }, + "leaf_mr_base": { + "mu": [ + null, -4.65, -4.65, -4.35, -4.35, -4.56, -4.36, + -4.36, -4.36, -4.36, -4.03, null, -3.78 + ], + "sigma": [ + null, 0.668, 0.668, 0.546, 0.546, 0.614, 0.704, + 0.704, 0.704, 0.704, 0.536, null, 0.855 + ] + }, + "froot_mr_base": { + "mu": [ + null, -4.21, -4.21, -4.21, -4.21, -4.21, -4.21, + -4.21, -4.21, -4.21, -4.21, null, -4.21 + ], + "sigma": [ + null, 0.84, 0.84, 0.84, 0.84, 0.84, 0.84, + 0.84, 0.84, 0.84, 0.84, null, 0.84 + ] + }, + "livewood_mr_base": { + "mu": [ + null, -5.27, -5.27, -5.27, -5.27, -5.27, -5.27, + -5.27, -5.27, -5.27, 0.00, null, 0.00 + ], + "sigma": [ + null, 1.54, 1.54, 1.54, 1.54, 1.54, 1.54, + 1.54, 1.54, 1.54, 0.00, null, 0.00 + ] + } +} diff --git a/mod17/data/MOD17_calibration_config.json b/mod17/data/MOD17_calibration_config.json new file mode 100644 index 0000000..de3d510 --- /dev/null +++ b/mod17/data/MOD17_calibration_config.json @@ -0,0 +1,38 @@ +{ + "description": "Calibration against MERRA-2 1980-2000 synthetic year and using priors with: Fixed froot_leaf_ratio, SLA based on carbon units", + "BPLUT": { + "GPP": "/home/arthur.endsley/src/mod17/lib/python/mod17/data/MOD17_BPLUT_CX.X_MERRA_NASA.csv", + "NPP": "/home/arthur.endsley/DATA/20220503_MOD17_GPP_recal_BPLUT.csv" + }, + "data": { + "file": "/anx_lagr4/MODIS_VIIRS/calibration/VIIRS_MOD16_MOD17_tower_site_drivers_v9.h5", + "sites_blacklisted": [ + "CN-Do1", "CN-Do2", "CN-Do3", "IT-Vig", "NL-Hor", "US-ORv", "US-WPT" + ] + }, + "optimization": { + "backend_template": "/home/arthur.endsley/20221027_MOD17_%s_calibration_PFT%d.nc4", + "prior": "/usr/local/dev/mod17/lib/python/mod17/data/MOD17_BPLUT_prior_20221027.json", + "chains": 3, + "draws": 100000, + "tune": "scaling", + "scaling": 1e-3, + "objective": "RMSE", + "bounds": { + "LUE_max": [0, 0.005], + "tmin0": [-35, 0], + "tmin1": [0, 25], + "vpd0": [0, 1000], + "vpd1": [1000, 8000], + "SLA": [1, 200], + "Q10": [0, 10], + "Q10_froot": [0, 10], + "Q10_livewood": [0, 10], + "froot_leaf_ratio": [0, 4], + "livewood_leaf_ratio": [0.01, 1.00], + "leaf_mr_base": [0, 0.1], + "froot_mr_base": [0, 0.1], + "livewood_mr_base": [0, 0.1] + } + } +} diff --git a/mod17/data/VNP17_BPLUT_prior_20220706.json b/mod17/data/VNP17_BPLUT_prior_20220706.json new file mode 100644 index 0000000..18f1c22 --- /dev/null +++ b/mod17/data/VNP17_BPLUT_prior_20220706.json @@ -0,0 +1,121 @@ +{ + "description": "Initial prior for VNP17 GPP and NPP models, extending 20220621 prior, but informed by MOD17 calibration", + "LUE_max": { + "mu": [ + null, 0.00105, 0.00133, 0.00125, 0.00158, 0.00140, 0.00083, + 0.00075, 0.00096, 0.00106, 0.00126, null, 0.00181 + ], + "sigma": [ + null, 0.00027, 0.00017, 0.00019, 0.00030, 0.00031, 0.00035, + 0.00021, 0.00030, 0.00031, 0.00037, null, 0.00044 + ] + }, + "tmin1": { + "mu": [ + null, 11.2, 13.1, 11.5, 14.7, 12.8, 12.8, + 12.2, 12.7, 11.8, 11.9, null, 13.8 + ], + "sigma": [ + null, 7.00, 7.17, 6.89, 6.89, 7.01, 7.17, + 7.07, 7.04, 7.03, 7.08, null, 7.16 + ] + }, + "vpd1": { + "mu": [ + null, 3566, 3516, 2098, 3124, 2979, 3438, + 4193, 3521, 3897, 4004, null, 4273 + ], + "sigma": [ + null, 1190, 1094, 463, 949, 975, 1292, + 1629, 1268, 1353, 1533, null, 1529 + ] + }, + "SLA": { + "mu": [ + null, 2.75, 3.21, 3.21, 3.61, 3.56, 3.21, + 3.21, 3.60, 3.58, 3.60, null, 3.72 + ], + "sigma": [ + null, 0.779, 0.452, 0.273, 0.626, 0.581, 0.310, + 0.310, 0.696, 0.526, 0.543, null, 0.599 + ] + }, + "Q10_livewood": { + "mu": [ + null, 1.89, 1.84, 1.89, 1.84, 1.84, 1.84, + 1.84, 1.84, 1.84, 0.00, null, 0.00 + ], + "sigma": [ + null, 0.27, 0.22, 0.27, 0.22, 0.22, 0.22, + 0.22, 0.22, 0.22, 0.00, null, 0.00 + ] + }, + "Q10_froot": { + "mu": [ + null, 1.60, 1.60, 1.60, 1.60, 1.60, 1.60, + 1.60, 1.60, 1.60, 1.60, null, 1.60 + ], + "sigma": [ + null, 1.60, 1.60, 1.60, 1.60, 1.60, 1.60, + 1.60, 1.60, 1.60, 1.60, null, 1.60 + ] + }, + "froot_leaf_ratio": { + "c": [ + null, 1.2, 1.1, 1.7, 1.1, 1.1, 1.0, + 1.3, 1.8, 1.8, 2.6,null, 2.0 + ], + "lower": [ + null, 0.8, 0.7, 1.3, 0.7, 0.7, 0.6, + 0.9, 1.4, 1.4, 2.2,null, 1.8 + ], + "upper": [ + null, 1.6, 1.5, 2.1, 1.5, 1.5, 1.4, + 1.7, 2.2, 2.2, 3.0,null, 2.4 + ] + }, + "livewood_leaf_ratio": { + "c": [ + null, 0.182, 0.162, 0.165, 0.203, 0.203, 0.079, + 0.040, 0.091, 0.051, 0.0 , null, 0.0 + ], + "lower": [ + null, 0.142, 0.122, 0.125, 0.163, 0.163, 0.039, + 0.001, 0.051, 0.011, 0.0 , null, 0.0 + ], + "upper": [ + null, 0.222, 0.202, 0.205, 0.243, 0.243, 0.199, + 0.080, 0.131, 0.091, 0.0 , null, 0.0 + ] + }, + "leaf_mr_base": { + "mu": [ + null, -4.65, -4.65, -4.35, -4.35, -4.56, -4.36, + -4.36, -4.36, -4.36, -4.03, null, -3.78 + ], + "sigma": [ + null, 0.668, 0.668, 0.546, 0.546, 0.614, 0.704, + 0.704, 0.704, 0.704, 0.536, null, 0.855 + ] + }, + "froot_mr_base": { + "mu": [ + null, -4.21, -4.21, -4.21, -4.21, -4.21, -4.21, + -4.21, -4.21, -4.21, -4.21, null, -4.21 + ], + "sigma": [ + null, 0.84, 0.84, 0.84, 0.84, 0.84, 0.84, + 0.84, 0.84, 0.84, 0.84, null, 0.84 + ] + }, + "livewood_mr_base": { + "mu": [ + null, -5.27, -5.27, -5.27, -5.27, -5.27, -5.27, + -5.27, -5.27, -5.27, 0.00, null, 0.00 + ], + "sigma": [ + null, 1.54, 1.54, 1.54, 1.54, 1.54, 1.54, + 1.54, 1.54, 1.54, 0.00, null, 0.00 + ] + } +} diff --git a/mod17/data/VNP17_calibration_config.json b/mod17/data/VNP17_calibration_config.json new file mode 100644 index 0000000..427dce5 --- /dev/null +++ b/mod17/data/VNP17_calibration_config.json @@ -0,0 +1,38 @@ +{ + "description": "Calibration against MERRA-2 1980-2000 climatology and using priors with: Fixed froot_leaf_ratio, SLA based on carbon units", + "BPLUT": { + "GPP": "/home/arthur.endsley/DATA/20220503_MOD17_GPP_recal_BPLUT.csv", + "NPP": "/home/arthur.endsley/DATA/20220707_VNP17_GPP_recal_BPLUT.csv" + }, + "data": { + "file": "/anx_lagr4/MODIS_VIIRS/calibration/VIIRS_MOD16_MOD17_tower_site_drivers_v9.h5", + "sites_blacklisted": [ + "CN-Do1", "CN-Do2", "CN-Do3", "IT-Vig", "NL-Hor", "US-ORv", "US-WPT" + ] + }, + "optimization": { + "backend_template": "/home/arthur.endsley/20221226_VNP17_%s_calibration_PFT%d.nc4", + "prior": "/home/arthur.endsley/src/mod17/lib/python/mod17/data/VNP17_BPLUT_prior_20220706.json", + "chains": 3, + "draws": 220000, + "tune": "scaling", + "scaling": 1e-3, + "objective": "RMSE", + "bounds": { + "LUE_max": [0, 0.005], + "tmin0": [-35, 0], + "tmin1": [0, 25], + "vpd0": [0, 1000], + "vpd1": [1000, 8000], + "SLA": [1, 200], + "Q10": [0, 10], + "Q10_froot": [0, 10], + "Q10_livewood": [0, 10], + "froot_leaf_ratio": [0, 4], + "livewood_leaf_ratio": [0.01, 1.00], + "leaf_mr_base": [0, 0.1], + "froot_mr_base": [0, 0.1], + "livewood_mr_base": [0, 0.1] + } + } +} diff --git a/mod17/science.py b/mod17/science.py new file mode 100644 index 0000000..598c78f --- /dev/null +++ b/mod17/science.py @@ -0,0 +1,202 @@ +import warnings +import numpy as np +from typing import Sequence + +def climatology365(series: Sequence, dates: Sequence) -> Sequence: + ''' + Computes a 365-day climatology for different locations from a time series + of length T. Ignores leap days. The climatology could then be indexed + using ordinals generated by `ordinals365()`. + + Parameters + ---------- + series : numpy.ndarray + T x ... array of data + dates : list or tuple + Sequence of datetime.datetime or datetime.date instances + + Returns + ------- + numpy.ndarray + ''' + # Get first and last day of the year (DOY) + ordinal = np.array([ + # Finally, subtract 1 from each day in a leap year after Leap Day + (doy - 1) if ((dates[i].year % 4 == 0) and doy >= 60) else doy + for i, doy in enumerate([ + # Next, fill in 0 wherever Leap Day occurs + 0 if (dates[i].year % 4 == 0 and doy == 60) else doy + for i, doy in enumerate([ + # First, convert datetime.datetime to ordinal day-of-year (DOY) + int(dt.strftime('%j')) for dt in dates + ]) + ]) + ]) + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + return np.array([ + np.nanmean(series[ordinal == day,...], axis = 0) + for day in range(1, 366) + ]) + + +def daynight_partition(arr_24hr, updown, reducer = 'mean', missing = (0, 0)): + ''' + Partitions a 24-hour time series array into daytime and nighttime values, + then calculates the mean in each group. Daytime is defined as when the sun + is above the horizon; nighttime is the complement. + + NOTE: If the sun never rises, the "daytime" average is computed over 24 + hours, anyway. In these cases, "nighttime" average is identical to the + "daytime" average (i.e., 24 hours of night). If the sun is always above + the horizon (i.e., sun never sets) on a given day, the "daytime" average + is computed as expected (over 24 hours) and the nighttime average is + missing and is instead filled with the second element of the `missing` + argument's sequence. + + Parameters + ---------- + arr_24hr : numpy.ndarray + A size (24 x ...) array; the first axis must have 24 elements + corresponding to the measurement in each hour + updown: numpy.ndarray + A size (2 x ...) array, compatible with arr_24hr, where the first axis + has the hour of sunrise and sunset, in that order, for each element + reducer : str + One of "mean" or "sum" indicating whether an average or a total of the + daytime/ nighttime values should be calculated; e.g., for "mean", the + hourly values from daytime hours are added up and divided by the + length of the day (in hours). + missing : tuple + Values to use when the sun is always below or above the horizon for 24 + hours (i.e., never rises or never sets); first value is ignored (may + be used to fill missing daylight hours in some future version), and + the second value is used to fill in missing nighttime hours + (Default: `(0, 0)`). + + Returns + ------- + numpy.ndarray + A size (2 x ...) array where the first axis enumerates the daytime and + nighttime mean values, respectively + ''' + assert reducer in ('mean', 'sum'),\ + 'Argument "reducer" must be one of: "mean", "sum"' + # Prepare single-valued output array + arr_daytime = np.zeros(arr_24hr.shape[1:]) + arr_nighttime = arr_daytime.copy() + daylight_hrs = arr_daytime.copy().astype(np.int16) + # Do sunrise and sunset define an interval? (Sunset > Sunrise)? + inside_interval = np.apply_along_axis(lambda x: x[1] > x[0], 0, updown) + # Or is the sun never up? + never_up = np.logical_and(updown[0,...] == -1, updown[1,...] == -1) + # Iteratively sum daytime VPD and temperature values + for hr in range(0, 24): + # Given only hour of sunrise/set on a 24-hour clock... + # if sun rises and sets on same day: SUNRISE <= HOUR <= SUNSET; + # if sun sets on next day: either SUNRISE <= HOUR or HOUR <= SUNSET; + sun_is_up = np.logical_or( # Either... + np.logical_and(inside_interval, # ...Rises and sets same day + np.logical_and(updown[0,...] <= hr, hr <= updown[1,...])), + np.logical_and(~inside_interval, # ...Sets on next day + np.logical_or(updown[0,...] <= hr, hr <= updown[1,...]))) + # For simplicity, compute a 24-hour mean even if the sun never rises; + # there's no way to know what the "correct" daytime value is + mask = np.logical_or(never_up, sun_is_up) + np.add(np.where( + mask, arr_24hr[hr,...], 0), arr_daytime, out = arr_daytime) + np.add(np.where( + ~mask, arr_24hr[hr,...], 0), arr_nighttime, out = arr_nighttime) + # Keep track of the denominator (hours) for calculating the mean; + # note that this over-estimates actual daylight hours by 1 hour + # but results in the correct denominator for the sums above + np.add(np.where(mask, 1, 0), daylight_hrs, out = daylight_hrs) + arr_24hr = None + # Calculate mean quantities + if reducer == 'mean': + arr_daytime = np.divide(arr_daytime, daylight_hrs) + arr_nighttime = np.divide(arr_nighttime, 24 - daylight_hrs) + # For sites where the sun is always above/ below the horizon, set missing + # nighttime values to zero + arr_nighttime[~np.isfinite(arr_nighttime)] = missing[1] + return np.stack((arr_daytime, arr_nighttime)) + + +def degree_lengths(phi, a = 6378137, b = 6356752.3142): + ''' + Returns the approximate length of degrees of latitude and longitude. + Source: + + https://en.wikipedia.org/wiki/Latitude + + phi : Number + Latitude, in degrees + a : Number + Radius of the Earth (major axis) in meters + b : Number + Length of minor axis of the Earth in meters + + Returns + ------- + tuple + Length of a degree of (longitude, latitude), respectively + ''' + e2 = ((a**2) - (b**2)) / (a**2) + # Approximate length of a degree of latitude + lat_m = 111132.954 - (559.822 * np.cos(2 * np.deg2rad(phi))) +\ + (1.175 * np.cos(4 * np.deg2rad(phi))) + lng_m = (np.pi * a * np.cos(np.deg2rad(phi))) / ( + 180 * np.sqrt(1 - (e2 * np.sin(np.deg2rad(phi))**2))) + return (lng_m, lat_m) + + +def nash_sutcliffe( + predicted: Sequence, observed: Sequence, norm: bool = False) -> float: + ''' + Computes the Nash-Sutcliffe efficiency of a model's predictions. + + $$ + \mathrm{NSE} = 1 - + \frac{\sum_i (y_i - \hat{y}_i)^2}{\sum_i (y_i - \bar{y}_i)^2} + $$ + + Parameters + ---------- + predicted : Sequence + Predicted values + observed : Sequence + Observed values + norm : bool + True to return the normalized Nash-Sutcliffe efficiency + (Default: False) + + Returns + ------- + float + ''' + nse = 1 - (np.nansum(np.power(observed - predicted, 2)) /\ + np.nansum(np.power(observed - np.nanmean(observed), 2))) + if norm: + return 1 / (2 - nse) # Return normalized NSE + return nse + + + +def ordinals365(dates: Sequence) -> Sequence: + ''' + Returns a length-T sequence of ordinals on [1,365]. Can be used for + indexing a 365-day climatology; see `climatology365()`. + + Parameters + ---------- + dates : list or tuple + Sequence of datetime.datetime or datetime.date instances + + Returns + ------- + list + ''' + return [ + t - 1 if (year % 4 == 0 and t >= 60) else t + for t, year in [(int(t.strftime('%j')), t.year) for t in dates] + ] diff --git a/mod17/sensitivity.py b/mod17/sensitivity.py new file mode 100644 index 0000000..6abbbf3 --- /dev/null +++ b/mod17/sensitivity.py @@ -0,0 +1,328 @@ +''' +Performs the Sobol' sensitivity analysis for the MOD17 GPP and NPP models. + +It's not clear that treating driver data as parameters is appropriate, here, +given that the NSE is calculated based on observed fluxes. +''' + +import json +import os +import warnings +import numpy as np +import h5py +import mod17 +from tqdm import tqdm +from mod17 import MOD17 +from mod17.utils import restore_bplut, pft_dominant +from mod17.science import nash_sutcliffe +from SALib.sample import saltelli +from SALib.analyze import sobol + +OUTPUT_TPL = '/home/arthur.endsley/MOD17_sensitivity_%s_analysis.json' + + +def main(model, config_file, pft = None): + ''' + Conducts the Sobol' sensitivity analysis for the GPP, NPP models. + + Parameters + ---------- + model : str + pft : int + ''' + with open(config_file, 'r') as file: + config = json.load(file) + if model.lower() == 'gpp': + saltelli_gpp(config, pft) + elif model.lower() == 'npp': + saltelli_npp(config, pft) + elif model.lower() == 'npp2': + saltelli_npp_and_gpp(config, pft) + else: + raise ValueError('Did not recognize model "%s"' % model) + + +def saltelli_gpp(config, pft = None): + ''' + Sensitivity analysis for the GPP model. + + Parameters + ---------- + pft : int + ''' + drivers, gpp = load_gpp_data(config, None, pft, validation_mask_only = True) + params = MOD17.required_parameters[0:5] + problem = { + 'num_vars': len(params), + 'names': params, + 'bounds': [ + config['optimization']['bounds'][p] + for p in params + ] + } + # NOTE: Number of samples must be a power of 2 + param_sweep = saltelli.sample(problem, 256) + Y = np.zeros([param_sweep.shape[0]]) + for i, X in enumerate(tqdm(param_sweep)): + yhat = MOD17._gpp(X, *drivers[0:4]) + Y[i] = nash_sutcliffe(yhat, gpp, norm = True) + metrics = sobol.analyze(problem, Y) + name = 'GPP' if pft is None else 'GPP-PFT%d' % pft + with open(OUTPUT_TPL % name, 'w') as file: + json.dump(dict([(k, v.tolist()) for k, v in metrics.items()]), file) + + +def saltelli_npp(config, pft = None): + ''' + Sensitivity analysis for the NPP model. + + Parameters + ---------- + pft : int + ''' + # Now for NPP; we have to load the BPLUT with the static parameters + bplut = restore_bplut(config['BPLUT']['NPP']) + gpp_params = [ + np.nanmean(bplut[p]) for p in MOD17.required_parameters[0:5] + ] + drivers, npp = load_npp_data(config, None, pft) + params = MOD17.required_parameters[5:] + if pft in (10, 12): + params = list(set(params).difference( + ('Q10_livewood', 'livewood_leaf_ratio', 'livewood_mr_base'))) + problem = { + 'num_vars': len(params), + 'names': params, + 'bounds': [ + config['optimization']['bounds'][p] for p in params + ] + } + # NOTE: Number of samples must be a power of 2 + samples = 4096 if pft == 3 else 1024 + samples = 2048 if pft in (10, 12) else samples + param_sweep = saltelli.sample(problem, samples) + Y = np.zeros([param_sweep.shape[0]]) + # Index the livewood parameters + idx = np.array([ + MOD17.required_parameters[5:].index(p) + for p in ('Q10_livewood', 'livewood_leaf_ratio', 'livewood_mr_base') + ]) + # There is no generalizable way to index and insert the livewood + # parameters because one of them comes at the end of the array and + # np.insert() will not work in that case; so, let's assert we have the + # right indices and make this part hard-coded + assert (idx == [1,4,7]).all() + for i, X in enumerate(tqdm(param_sweep)): + if pft in (10, 12): + # Even more confusingly, have to increment each successive index + # because of the progressive insertion + X0 = X.copy() + for j in idx: + X = np.insert(X, j, 0) + yhat = MOD17._npp(np.array([*gpp_params, *X]), *drivers) + Y[i] = nash_sutcliffe(yhat, npp, norm = True) + metrics = sobol.analyze(problem, Y) + name = 'NPP' if pft is None else 'NPP-PFT%d' % pft + with open(OUTPUT_TPL % name, 'w') as file: + if pft in (10, 12): + output = dict() + for key, value in metrics.items(): + for j in idx: + value = np.insert(value, j, 0) + output[key] = value.tolist() + else: + output = dict([(k, v.tolist()) for k, v in metrics.items()]) + json.dump(output, file) + + +def saltelli_npp_and_gpp(config, pft = None): + ''' + Sensitivity analysis for the NPP model, including the GPP parameters + in the analysis. + + Parameters + ---------- + pft : int + ''' + # Now for NPP; we have to load the BPLUT with the static parameters + bplut = restore_bplut(config['BPLUT']['NPP']) + print('Loading data...') + drivers, npp = load_npp_data(config, None, pft) + print('Setting up experiments...') + params = MOD17.required_parameters + if pft in (10, 12): + params = list(set(params).difference( + ('Q10_livewood', 'livewood_leaf_ratio', 'livewood_mr_base'))) + problem = { + 'num_vars': len(params), + 'names': params, + 'bounds': [ + config['optimization']['bounds'][p] for p in params + ] + } + # NOTE: Number of samples must be a power of 2 + samples = 2048 + if pft is not None: + samples = 4096 if pft == 3 else 1024 + samples = 2048 if pft in (10, 12) else samples + param_sweep = saltelli.sample(problem, samples) + Y = np.zeros([param_sweep.shape[0]]) + # Index the livewood parameters + idx = np.array([ + MOD17.required_parameters[5:].index(p) + for p in ('Q10_livewood', 'livewood_leaf_ratio', 'livewood_mr_base') + ]) + # There is no generalizable way to index and insert the livewood + # parameters because one of them comes at the end of the array and + # np.insert() will not work in that case; so, let's assert we have the + # right indices and make this part hard-coded + assert (idx == [1,4,7]).all() + for i, X in enumerate(tqdm(param_sweep)): + if pft in (10, 12): + # Even more confusingly, have to increment each successive index + # because of the progressive insertion + X0 = X.copy() + for j in idx: + X = np.insert(X, j, 0) + yhat = MOD17._npp(X, *drivers) + Y[i] = nash_sutcliffe(yhat, npp, norm = True) + metrics = sobol.analyze(problem, Y) + name = 'NPP' if pft is None else 'NPP-PFT%d' % pft + with open(OUTPUT_TPL % name, 'w') as file: + if pft in (10, 12): + output = dict() + for key, value in metrics.items(): + for j in idx: + value = np.insert(value, j, 0) + output[key] = value.tolist() + else: + output = dict([(k, v.tolist()) for k, v in metrics.items()]) + json.dump(output, file) + + +def load_gpp_data( + config, filename = None, pft = None, subgrid = False, verbose = True, + validation_mask_only = False): + ''' + Loads the data required for running a sensitivity analysis on the + MOD17 GPP model. + + Parameters + ---------- + config : dict + filename : str or None + pft : int + subgrid : bool + verbose : bool + validation_mask_only : bool + + Returns + ------- + tuple + A 3-element tuple: A sequence of driver datasets (first element) and + the array of valid GPP observations (second element), and the mask + ''' + if verbose: + print('Loading GPP datasets...') + if filename is None: + filename = config['data']['file'] + with h5py.File(filename, 'r') as hdf: + sites = hdf['FLUXNET/site_id'][:].tolist() + if hasattr(sites[0], 'decode'): + sites = [s.decode('utf-8') for s in sites] + # NOTE: Converting from Kelvin to Celsius + tday = hdf['MERRA2/T10M_daytime'][:] - 273.15 + qv10m = hdf['MERRA2/QV10M_daytime'][:] + ps = hdf['MERRA2/PS_daytime'][:] + drivers = [ # fPAR, Tmin, VPD, PAR, LAI, Tmean, years + hdf['MODIS/MOD15A2HGF_fPAR_interp'][:], + hdf['MERRA2/Tmin'][:] - 273.15, + MOD17.vpd(qv10m, ps, tday), + MOD17.par(hdf['MERRA2/SWGDN'][:]), + ] + observed_gpp = hdf['FLUXNET/GPP'][:] + is_test = hdf['FLUXNET/validation_mask'][:].sum(axis = 0).astype(bool) + if pft is not None: + blacklist = config['data']['sites_blacklisted'] + pft_map = pft_dominant(hdf['state/PFT'][:], site_list = sites) + pft_mask = np.logical_and(pft_map == pft, ~np.in1d(sites, blacklist)) + drivers = [d[:,pft_mask] for d in drivers] + observed_gpp = observed_gpp[:,pft_mask] + # Set negative VPD to zero + drivers[2] = np.where(drivers[2] < 0, 0, drivers[2]) + # Convert fPAR from (%) to [0,1] + if subgrid: + drivers[0] = drivers[0] * 0.01 + else: + # Average over the subgrid + drivers[0] = np.nanmean(drivers[0], axis = -1) * 0.01 + # Speed things up by focusing only on data points where valid data exist + mask = ~np.isnan(observed_gpp) + if validation_mask_only: + # Stratify the data using the validation mask so that an equal number + # of samples from each PFT are used + mask = np.logical_and(is_test, mask) + drivers = [d[mask] for d in drivers] + return (drivers, observed_gpp[mask], mask) + + +def load_npp_data( + config, filename = None, pft = None, subgrid = False, verbose = True): + ''' + Loads the data required for running a sensitivity analysis on the + MOD17 NPP model. + + Parameters + ---------- + config : dict + filename : str + pft : int + subgrid : bool + verbose : bool + + Returns + ------- + tuple + A 2-element tuple: A sequence of driver datasets (first element) and + the array of valid NPP observations (second element) + ''' + if verbose: + print('Loading NPP datasets...') + if filename is None: + filename = config['data']['file'] + with h5py.File(filename, 'r') as hdf: + sites = hdf['NPP/site_id'][:].tolist() + if hasattr(sites[0], 'decode'): + sites = [s.decode('utf-8') for s in sites] + # NOTE: Converting from Kelvin to Celsius + tday = hdf['NPP/surface_met_MERRA2/T10M_daytime'][:] - 273.15 + qv10m = hdf['NPP/surface_met_MERRA2/QV10M_daytime'][:] + ps = hdf['NPP/surface_met_MERRA2/PS_daytime'][:] + drivers = [ # fPAR, Tmin, VPD, PAR, LAI, Tmean, years + hdf['NPP/MOD15A2H_fPAR_clim_filt'][:], + hdf['NPP/surface_met_MERRA2/Tmin'][:] - 273.15, + MOD17.vpd(qv10m, ps, tday), + MOD17.par(hdf['NPP/surface_met_MERRA2/SWGDN'][:]), + hdf['NPP/MOD15A2H_LAI_clim_filt'][:], + hdf['NPP/surface_met_MERRA2/T10M'][:] - 273.15, + np.full(ps.shape, 1) # i.e., A 365-day climatological year ("Year 1") + ] + observed_npp = hdf['NPP/NPP_total'][:] + if pft is not None: + blacklist = config['data']['sites_blacklisted'] + pft_map = hdf['NPP/PFT'][:] + drivers = [d[:,pft_map == pft] for d in drivers] + observed_npp = observed_npp[pft_map == pft] + # Set negative VPD to zero + drivers[2] = np.where(drivers[2] < 0, 0, drivers[2]) + # Convert fPAR from (%) to [0,1] and re-scale LAI; reshape fPAR and LAI + drivers[0] = np.nanmean(drivers[0], axis = -1) * 0.01 + drivers[4] = np.nanmean(drivers[4], axis = -1) * 0.1 + return (drivers, observed_npp) + + +if __name__ == '__main__': + import fire + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + fire.Fire(main) diff --git a/mod17/srs.py b/mod17/srs.py new file mode 100644 index 0000000..7285dcb --- /dev/null +++ b/mod17/srs.py @@ -0,0 +1,215 @@ +''' +MODIS sinusoidal projection forward and backward coordinate transformations, +courtesy of Giglio et al. (2018), Collection 6 MODIS Burned Area Product +User's Guide, Version 1, Appendix B: + + https://modis-fire.umd.edu/files/MODIS_C6_BA_User_Guide_1.2.pdf +''' + +import h5py +import re +import numpy as np +from numbers import Number +from typing import Sequence, Tuple, Iterable + +SPHERE_RADIUS = 6371007.181 # Radius of ideal sphere, meters +TILE_LINES = {250: 5000, 500: 2400, 1000: 1200} # Num. lines by nominal res. +TILE_SIZE = 1111950 # Width and height of MODIS tile in projection plane +XMIN = -20015109 # Western limit of projection plane, meters +YMAX = 10007555 # Nothern limit of projection plane, meters +VIIRS_METADATA = re.compile( + r'.*XDim=(?P\d+)' + r'.*YDim=(?P\d+)' + r'.*UpperLeftPointMtrs=\((?P