From ef626d69b7ff72f042d46ee37a7de0cfe229a358 Mon Sep 17 00:00:00 2001 From: "K. Arthur Endsley" Date: Mon, 27 Feb 2023 12:54:00 -0700 Subject: [PATCH] Initial commit --- LICENSE | 21 + README.md | 30 + REQUIREMENTS | 8 + data/MOD17_BPLUT_C5.1_MERRA_NASA.txt | 21 + mod17/__init__.py | 503 ++++++++ mod17/calibration.py | 1265 ++++++++++++++++++++ mod17/data/MOD17_BPLUT_C5.1_MERRA_NASA.csv | 13 + mod17/data/MOD17_BPLUT_CX.X_MERRA_NASA.csv | 14 + mod17/data/MOD17_BPLUT_prior_20220525.json | 82 ++ mod17/data/MOD17_BPLUT_prior_20220611.json | 82 ++ mod17/data/MOD17_BPLUT_prior_20220621.json | 83 ++ mod17/data/MOD17_BPLUT_prior_20221027.json | 101 ++ mod17/data/MOD17_BPLUT_prior_20221223.json | 101 ++ mod17/data/MOD17_calibration_config.json | 38 + mod17/data/VNP17_BPLUT_prior_20220706.json | 121 ++ mod17/data/VNP17_calibration_config.json | 38 + mod17/science.py | 202 ++++ mod17/sensitivity.py | 328 +++++ mod17/srs.py | 215 ++++ mod17/utils.py | 542 +++++++++ mod17/viirs.py | 468 ++++++++ setup.cfg | 33 + setup.py | 3 + tests/tests.py | 359 ++++++ 24 files changed, 4671 insertions(+) create mode 100644 LICENSE create mode 100644 README.md create mode 100644 REQUIREMENTS create mode 100644 data/MOD17_BPLUT_C5.1_MERRA_NASA.txt create mode 100644 mod17/__init__.py create mode 100644 mod17/calibration.py create mode 100644 mod17/data/MOD17_BPLUT_C5.1_MERRA_NASA.csv create mode 100644 mod17/data/MOD17_BPLUT_CX.X_MERRA_NASA.csv create mode 100644 mod17/data/MOD17_BPLUT_prior_20220525.json create mode 100644 mod17/data/MOD17_BPLUT_prior_20220611.json create mode 100644 mod17/data/MOD17_BPLUT_prior_20220621.json create mode 100644 mod17/data/MOD17_BPLUT_prior_20221027.json create mode 100644 mod17/data/MOD17_BPLUT_prior_20221223.json create mode 100644 mod17/data/MOD17_calibration_config.json create mode 100644 mod17/data/VNP17_BPLUT_prior_20220706.json create mode 100644 mod17/data/VNP17_calibration_config.json create mode 100644 mod17/science.py create mode 100644 mod17/sensitivity.py create mode 100644 mod17/srs.py create mode 100644 mod17/utils.py create mode 100644 mod17/viirs.py create mode 100644 setup.cfg create mode 100644 setup.py create mode 100644 tests/tests.py diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..fef73a2 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 K. Arthur Endsley + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..be759f1 --- /dev/null +++ b/README.md @@ -0,0 +1,30 @@ +MODIS MOD17 Terrestrial Productivity Algorithm +============================================== + +The MODIS MOD17 algorithm provided the first global, continuous, weekly estimates of ecosystem gross primary productivity (GPP) and annual estimates of net primary productivity (NPP). This source code can be used for comprehensive calibration, validation, sensitivity, and uncertainty analysis of the MOD17 algorithm. It was used by Endsley et al. (In Review) for the final recalibration of MODIS MOD17 and the development of a new, VIIRS-based VNP17 global productivity algorithm. + + +Installation +------------ + +Within the `MOD17` repository's root directory: + +```sh +pip install . +``` + +Tests can be run with: + +```sh +python tests/tests.py +``` + + +Citation +-------- + +**If using this software, please cite the following paper:** + +``` +Endsley, K.A., M. Zhao, J.S. Kimball, S. Devadiga. In Review. Continuity of global MODIS terrestrial primary productivity estimates in the VIIRS era using model-data fusion. Journal of Geophysical Research: Biogeosciences. +``` diff --git a/REQUIREMENTS b/REQUIREMENTS new file mode 100644 index 0000000..09eb527 --- /dev/null +++ b/REQUIREMENTS @@ -0,0 +1,8 @@ +h5py==3.4.0 +netCDF4>=1.5.7 +numpy<=1.21.5 +scipy>=1.7.0 +xarray>=0.19.0 +suntransit>=0.1.0 +tqdm>=4.60.0 +matplotlib>=3.0.0 diff --git a/data/MOD17_BPLUT_C5.1_MERRA_NASA.txt b/data/MOD17_BPLUT_C5.1_MERRA_NASA.txt new file mode 100644 index 0000000..8b9c58a --- /dev/null +++ b/data/MOD17_BPLUT_C5.1_MERRA_NASA.txt @@ -0,0 +1,21 @@ +UMD_VEG_LC ENF=0 EBF=1 DNF=2 DBF=3 MF=4 CShrub=5 OShrub=6 WSavannas=7 Savannas=8 Grass=9 Crop=10 +LUEmax(KgC/m^2/d/MJ) 0.001211 0.001405 0.001227 0.001526 0.001226 0.001495 0.001027 0.001498 0.001454 0.001215 0.001300 +Tmin_min(C) -8.00 -8.00 -8.00 -6.00 -7.00 -8.00 -8.00 -8.00 -8.00 -8.00 -8.00 +Tmin_max(C) 8.31 9.09 10.44 9.94 9.50 8.61 8.80 11.39 11.39 12.02 12.02 +VPD_min(Pa) 650.0 1000.0 650.0 650.0 650.0 650.0 650.0 650.0 650.0 650.0 650.0 +VPD_max(Pa) 3000.0 4000.0 3500.0 2900.0 2900.0 4300.0 4400.0 3500.0 3600.0 4200.0 4500.0 +SLA(LAI/KgC) 15.0 26.9 16.9 24.7 22.6 9.4 12.0 28.8 28.9 38.0 38.0 +Q10(unitless) 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 +froot_leaf_ratio 1.2 1.1 1.7 1.1 1.1 1.0 1.3 1.8 1.8 2.6 2.0 +livewood_leaf_ratio 0.182 0.162 0.165 0.203 0.203 0.079 0.040 0.091 0.051 0.000 0.000 +leaf_mr_base(Kgc/KgC/d) 0.00604 0.00604 0.00815 0.00778 0.00778 0.00869 0.00519 0.00869 0.00869 0.0098 0.0098 +froot_mr_base(Kgc/KgC/d) 0.00519 0.00519 0.00519 0.00519 0.00519 0.00519 0.00519 0.00519 0.00519 0.00819 0.00819 +livewood_mr_base(Kgc/KgC/d) 0.00397 0.00397 0.00397 0.00371 0.00371 0.00436 0.00218 0.00312 0.00100 0.00000 0.00000 + +*** Recalibrated by Maosheng Zhao before March 31st, 2009. Version is the improved Collection5.1 *** +*** Note not add extra spaces between variable names and parentheses, otherwise it will be messed up when used by MOD17 program *** +Below is the mean GPP and NPP from 2000 to 2006 estimated from 0.25 degree tuned BPLUT system of the C5.1, which will be used to +tune new version of the global MOD17. The tuning tools are MOD17_improved_main_code_GEO.c and mean_GPP_NPP_tune_BPLUT.c +UMD_VEG_LC ENF=0 EBF=1 DNF=2 DBF=3 MF=4 CShrub=5 OShrub=6 WSavannas=7 Savannas=8 Grass=9 Crop=10 +GPP(gC/m^2/yr) 877.10 2702.62 727.21 1339.93 1133.22 812.04 308.83 1368.90 1208.90 393.25 884.11 +NPP(gC/m^2/yr) 478.49 1175.45 343.55 622.79 604.82 381.50 160.14 673.06 594.73 205.86 486.46 diff --git a/mod17/__init__.py b/mod17/__init__.py new file mode 100644 index 0000000..3abe2b9 --- /dev/null +++ b/mod17/__init__.py @@ -0,0 +1,503 @@ +''' +The MOD17 Daily GPP and Annual GPP algorithm. + +Note that there are two hidden methods of the MOD17 class: + +- `MOD17._gpp()` +- `MOD17._npp()` + +These are streamlined implementations of `MOD17.daily_gpp()` and +`MOD17.annual_npp()`, respectively, and were designed for use in calibration, +where repeated function calls can make function overhead a real issue. These +streamlined functions expect a vectorized parameters array as the first +argument and subsequent arguments are driver datasets, e.g.: + + MOD17._gpp(params, fpar, tmin, vpd, par) + MOD17._npp(params, fpar, tmin, vpd, par, lai, tmean, years) +''' + +import warnings +import numpy as np +from typing import Callable, Sequence, Tuple, Union, Iterable +from numbers import Number + +PFT_VALID = (1,2,3,4,5,6,7,8,9,10,12) + +class MOD17(object): + ''' + The MODIS MxD17 Gross Primary Productivity and Net Photosynthesis model. + The required model parameters are: + + - `LUE_max`: Maximum light-use efficiency (kg C MJ-1) + - `tmin0` and `tmin1`: The lower and upper bounds on the temperature + response of photosynthesis (deg C); i.e., temperature at which stomata + are fully closed and fully open, respectively + - `vpd0` and `vpd0`: The lower and upper bounds on the response to VPD + of photosynthesis (Pa); i.e., VPD at which stomata are full open and + fully closed, respectively + - `SLA`: Specific leaf area, or projected leaf area per kilogram of C + [LAI kg C-1] + - `q10`: Exponent shape parameter controlling respiration as a function of + temperature (in degrees C) (unitless) + - `froot_leaf_ratio`: The ratio of fine root C to leaf C (unitless). + - `livewood_leaf_ratio`: Ratio of live wood carbon to annual maximum leaf + carbon + - `leaf_mr_base`: Maintenance respiration per unit leaf carbon per day at + a reference temperature of 20 degrees C [kg C (kg C)-1 day-1] + - `froot_mr_base`: Maintenance respiration per unit fine root carbon per + day at a reference temperature of 20 degrees C [kg C (kg C)-1 day-1] + - `livewood_mr_base`: Maintenance respiration per unit live wood carbon + per day at a reference temperature of 20 degrees C [kg C (kg C)-1 d-1] + + NOTE: This class includes private class methods `MOD17._gpp()` and + `MOD17._gpp()`, that avoid the overhead associated with creating a model + instance; it should be used, e.g., for model calibration because it is faster + and produces the same results as `MOD17.daily_gpp()`. + + NOTE: For multiple PFTs, vectorized parameters array can be passed; i.e., + a dictionary where each value is an (N,) array for N sites. + + Parameters + ---------- + params : dict + Dictionary of model parameters + ''' + required_parameters = [ + 'LUE_max', 'tmin0', 'tmin1', 'vpd0', 'vpd1', 'SLA', + 'Q10_livewood', 'Q10_froot', 'froot_leaf_ratio', + 'livewood_leaf_ratio', 'leaf_mr_base', 'froot_mr_base', + 'livewood_mr_base' + ] + + def __init__(self, params: dict): + self.params = params + for key in self.required_parameters: + setattr(self, key, params[key]) + + @staticmethod + def _gpp(params, fpar, tmin, vpd, par): + 'Daily GPP as static method, avoids overhead of class instantiation' + # "params" argument should be a Sequence of atomic parameter values + # in the order prescribed by "required_parameters" + tmin_scalar = linear_constraint(params[1], params[2])(tmin) + vpd_scalar = linear_constraint( + params[3], params[4], form = 'reversed')(vpd) + lue = params[0] * tmin_scalar * vpd_scalar + return 1e3 * lue * fpar * par + + @staticmethod + def _npp(params, fpar, tmin, vpd, par, lai, tmean, years): + ''' + Annual NPP as static method, avoids overhead of class instantiation. + NOTE: It's assumed that the elements of `years` are in chronological + order on the first axis (time axis). + ''' + # "params" argument should be a Sequence of atomic parameter values + # in the order prescribed by "required_parameters" + gpp = MOD17._gpp(params, fpar, tmin, vpd, par) + # Daily respiration + leaf_mass = lai / params[5] # LAI divided by SLA -> leaf mass [kg m-2] + froot_mass = leaf_mass * params[8] # Leaf mass times `froot_leaf_ratio` + # NOTE: Q10 calculated differently depending on the component + _exp = (tmean - 20) / 10 + q10_leaf = np.power(3.22 - 0.046 * tmean, _exp) + q10_froot = np.power(params[7], _exp) + # Convert leaf, froot mass from [kg C m-2] to [g C m-2], then... + r_leaf = 1e3 * leaf_mass * params[10] * q10_leaf + r_froot = 1e3 * froot_mass * params[11] * q10_froot + # Accumulate respiration over each year + all_years = np.unique(years).tolist() + # Pre-allocate arrays + mr_leaf = np.full((len(all_years), *lai.shape[1:],), np.nan) + mr_froot = np.full((len(all_years), *lai.shape[1:],), np.nan) + mr_livewood = np.full((len(all_years), *lai.shape[1:],), np.nan) + diff = np.full((len(all_years), *lai.shape[1:],), np.nan) + for i, each_year in enumerate(all_years): + # Sum respiration for each tissue in each year + mr_leaf[i] = np.nansum( + np.where(years == each_year, r_leaf, 0), axis = 0) + mr_froot[i] = np.nansum( + np.where(years == each_year, r_froot, 0), axis = 0) + livewood_mass = (np.nanmax( + np.where(years == each_year, lai, np.nan), axis = 0 + ) / params[5] + ) * params[9] + # For consistency with other respiration components, livewood + # respiration should be zero, not NaN, when no respiration + mrl = 1e3 * livewood_mass * params[12] *\ + np.power(params[6], (tmean - 20) / 10).sum(axis = 0) + mr_livewood[i] = np.where(np.isnan(mrl), 0, mrl) + # Total plant maintenance respiration + r_m = mr_leaf[i] + mr_froot[i] + mr_livewood[i] + # GPP - R_M + diff[i] = np.where(years == each_year, gpp, 0).sum(axis = 0) - r_m + # Annual growth respiration is assumed to be 20% of (GPP - R_M); see + # Figure 1 of MOD17 User Guide + return np.where(diff < 0, 0, 0.8 * diff) + + @staticmethod + def par(sw_rad: Number, period_hrs: Number = 1) -> Number: + ''' + Calculates daily total photosynthetically active radiation (PAR) from + (hourly) incoming short-wave radiation (SW_rad). PAR is assumed to + be 45% of SW_rad. + + Parameters + ---------- + swrad : int or float or numpy.ndarray + Incoming short-wave radiation (W m-2) + period_hrs : int + Period over which radiation is measured, in hours (Default: 1) + + Returns + ------- + int or float or numpy.ndarray + ''' + # Convert SW_rad from [W m-2] to [MJ m-2], then take 45%; + # 3600 secs hr-1 times (1 MJ / 1e6 Joules) == 0.0036 + return 0.45 * (0.0036 * (24 / period_hrs) * sw_rad) + + @staticmethod + def vpd(qv10m: Number, pressure: Number, tmean: Number) -> Number: + ''' + Computes vapor pressure deficit (VPD) from surface meteorology. + + Parameters + ---------- + qv10m : int or float or numpy.ndarray + Water vapor mixing ratio at 10-meter height (Pa) + pressure : int or float or numpy.ndarray + Atmospheric pressure (Pa) + tmean : int or float or numpy.ndarray + Mean daytime temperature (degrees C) + + Returns + ------- + int or float or numpy.ndarray + ''' + # Actual vapor pressure (Gates 1980, Biophysical Ecology, p.311) + avp = (qv10m * pressure) / (0.622 + (0.379 * qv10m)) + # Saturation vapor pressure (similar to FAO formula) + svp = 610.7 * np.exp((17.38 * tmean) / (239 + tmean)) + return svp - avp + + def annual_npp( + self, fpar: Sequence, tmin: Sequence, vpd: Sequence, + par: Sequence, lai: Sequence, tmean: Sequence, years: Sequence + ) -> np.ndarray: + ''' + Annual net primary productivity (NPP). + + Parameters + ---------- + fpar : Sequence + Fraction of PAR intercepted by the canopy [0, 1], a (T x ...) + array for T number of days + tmin : Sequence + Daily minimum temperature (degrees C), a (T x ...) array for T + number of days + vpd : Sequence + Daytime vapor pressure deficit (Pa), a (T x ...) array for T + number of days + par : Sequence + Daily photosynthetically active radation (MJ m-2 day-1) + lai : Sequence + Leaf area index, daily, a (T x ...) array for T number of days + tmean : Sequence + Mean daily temperature (degrees C), a (T x ...) array for T number + of days + years : Sequence + Sequence of integers indicating the year of each daily + measurement, in order (e.g., [2015, 2015, ..., 2017]); a (T x ...) + array for T number of days + + Returns + ------- + numpy.ndarray + Total annual NPP [g C m-2 year-1] + ''' + r_leaf, r_froot, r_livewood = self.annual_respiration( + lai, tmean, years) + r_m = r_leaf + r_froot + r_livewood + gpp = self.daily_gpp(fpar, tmin, vpd, par) + diff = np.empty(r_m.shape) + all_years = np.unique(years).tolist() + for i, each_year in enumerate(all_years): + # GPP - R_M + diff[i] = np.where(years == each_year, gpp, 0).sum(axis = 0) - r_m[i] + return np.where(diff < 0, 0, 0.8 * diff) + + def annual_respiration( + self, lai: Sequence, tmean: Sequence, years: Sequence + ) -> Iterable[Tuple[Sequence, Sequence, Sequence]]: + ''' + Annual total maintenance respiration. Input datasets should have daily + denominations and extend over one or more years. + + Parameters + ---------- + lai : Sequence + Leaf area index, daily, a (T x ...) array for T number of days + tmean : Sequence + Mean daily temperature (degrees C), a (T x ...) array for T number + of days + years : Sequence + Sequence of integers indicating the year of each daily + measurement, in order (e.g., [2015, 2015, ..., 2017]); a (T x ...) + array for T number of days + + Returns + ------- + tuple + A 3-tuple of total annual (leaf, fine root, livewood) respiration + with units of [g C m-2 year-1] + ''' + assert lai.shape == years.shape,\ + 'LAI array should conform with "years" array' + assert tmean.shape == years.shape,\ + 'Mean temperature array should conform with "years" array' + r_leaf_daily, r_froot_daily = self.daily_respiration(lai, tmean) + r_leaf, r_froot, r_livewood = [], [], [] + all_years = np.unique(years).tolist() + all_years.sort() + for i, each_year in enumerate(all_years): + r_leaf.append( + np.nansum(np.where(years == each_year, r_leaf_daily, 0), + axis = 0)) + r_froot.append( + np.nansum(np.where(years == each_year, r_froot_daily, 0), + axis = 0)) + # Annual maximum leaf mass (kg C) converted to livewood mass + # by allometric relation (livewood_leaf_ratio) + livewood_mass = (np.nanmax( + np.where(years == each_year, lai, np.nan), axis = 0 + ) / self.SLA) * self.livewood_leaf_ratio + # Livewood maintenance respiration (g C day-1), converted from + # (kg C day-1), as the product of livewood_mass, base + # respiration rate (livewood_mr_base), and annual sum of the + # maint. respiration term (Q10), see Equation 1.10, User's Guide + # NOTE: "livewood_mr_base" is denominated in days + # (kg C kg C-1 day-1) but that's okay because we took the annual + # sum of the Q10 respiration, essentially multipling by ~365 + rl = 1e3 * livewood_mass * self.livewood_mr_base *\ + np.power(self.Q10_livewood, (tmean - 20) / 10).sum(axis = 0) + # For consistency with other respiration components, livewood + # respiration should be zero, not NaN, when no respiration + r_livewood.append(np.where(np.isnan(rl), 0, rl)) + return (np.stack(r_leaf), np.stack(r_froot), np.vstack(r_livewood)) + + def daily_gpp( + self, fpar: Number, tmin: Number, vpd: Number, + par: Number) -> Number: + r''' + Daily gross primary productivity (GPP). + + $$ + \mathrm{GPP} = \varepsilon\times f(T_{min})\times f(V)\times + [\mathrm{PAR}]\times [\mathrm{fPAR}] + $$ + + Where $T_{min}$ is the minimum daily temperature, $V$ is the daytime + vapor pressure deficit (VPD), PAR is daily photosynthetically active + radiation, fPAR is the fraction of PAR absorbed by the canopy, and + $\varepsilon$ is the intrinsic (or maximum) light-use efficiency. + + Parameters + ---------- + fpar : int or float or numpy.ndarray + Fraction of PAR intercepted by the canopy [0, 1] + tmin : int or float or numpy.ndarray + Daily minimum temperature (degrees C) + vpd : int or float or numpy.ndarray + Daytime vapor pressure deficit (Pa) + par : int or float or numpy.ndarray + Daily photosynthetically active radation (MJ m-2 day-1) + + Returns + ------- + int or float or numpy.ndarray + Daily GPP flux in [g C m-2 day-1] + ''' + return 1e3 * self.lue(tmin, vpd) * fpar * par + + def daily_respiration( + self, lai: Number, tmean: Number + ) -> Iterable[Tuple[Number, Number]]: + r''' + Daily maintenance respiration for leaves and fine roots. + + Maintenance respiration ($r_m$) for leaves or fine roots is given: + + $$ + $r_m$ = m \times r_0 \times q^{\frac{T - 20}{10}} + $$ + + Where $m$ is either the leaf mass or fine root mass; $r_0$ is the rate + of maintenance respiration per unit leaf carbon (per day, at 20 + degrees C); and $q$ is the Q10 factor. + + NOTE: For fine roots and live wood, Q10 is a constant value of 2.0. + For leaves, the temperature-acclimated Q10 equation of Tjoelker et al. + (2001, Global Change Biology) is used: + + $$ + Q_{10} = 3.22 - 0.046 * T_{avg} + $$ + + The "net photosynthesis" quantity in MOD17, even though it is a bit + misleading (it does not account for growth respiration and livewood + $r_m$) can then be calculated as GPP less the maintenance respiration + of leaves and fine roots: + + $$ + P_{net} = [\mathrm{GPP}] - r_{leaf} - r_{root} + $$ + + Parameters + ---------- + lai : float or numpy.ndarray + Leaf area index, daily + tmean : float or numpy.ndarray + Mean daily temperature (degrees C) + + Returns + ------- + tuple + 2-element tuple of (leaf respiration, fine root respiration) in + units of [g C m-2 day-1] + ''' + # Leaf mass, fine root mass (Eq 1.4, 1.5 in MOD17 User Guide) + leaf_mass = lai / self.SLA + froot_mass = leaf_mass * self.froot_leaf_ratio + # NOTE: Q10 calculated differently depending on the component + _exp = (tmean - 20) / 10 # Equations 1.6 and 1.7 + q10_leaf = np.power(3.22 - 0.046 * tmean, _exp) # Equation 1.11 + q10_froot = np.power(self.Q10_froot, _exp) + # NOTE: Converting from [kg C] to [g C] via *1e3 + r_leaf = 1e3 * leaf_mass * self.leaf_mr_base * q10_leaf + r_froot = 1e3 * froot_mass * self.froot_mr_base * q10_froot + return (r_leaf, r_froot) + + def daily_net_photosynthesis( + self, fpar: Number, tmin: Number, vpd: Number, par: Number, + lai: Number, tmean: Number) -> Number: + ''' + Daily net photosynthesis ("PSNet"). See: + + - `MOD17.daily_gpp()` + - `MOD17.daily_respiration()` + + Parameters + ---------- + fpar : int or float or numpy.ndarray + Fraction of PAR intercepted by the canopy [0, 1] + tmin : int or float or numpy.ndarray + Daily minimum temperature (degrees C) + vpd : int or float or numpy.ndarray + Daytime vapor pressure deficit (Pa) + par : int or float or numpy.ndarray + Daily photosynthetically active radation (MJ m-2 day-1) + lai : float or numpy.ndarray + Leaf area index, daily + tmean : float or numpy.ndarray + Mean daily temperature (degrees C) + ''' + gpp = self.daily_gpp(fpar, tmin, vpd, par) + r_leaf, r_froot = self.daily_respiration(lai, tmean) + # See MOD17 User Gudie, Equation 1.8 + return gpp - r_leaf - r_froot + + def lue(self, tmin: Number, vpd: Number) -> Number: + ''' + The instantaneous light-use efficiency (LUE), reduced by environmental + stressors (low minimum temperature, high VPD) from the maximum LUE. + + Parameters + ---------- + tmin : int or float or numpy.ndarray + vpd : int or float or numpy.ndarray + + Returns + ------- + float or numpy.ndarray + ''' + return self.LUE_max * self.tmin_scalar(tmin) * self.vpd_scalar(vpd) + + def tmin_scalar(self, x: Number) -> Number: + ''' + Parameters + ---------- + x : int or float or numpy.ndarray + Minimum temperature (deg C) + + Returns + ------- + int or float or numpy.ndarray + ''' + return linear_constraint(self.tmin0, self.tmin1)(x) + + def vpd_scalar(self, x: Number) -> Number: + ''' + The environmental scalar for vapor pressure deficit (VPD). + + Parameters + ---------- + x : int or float or numpy.ndarray + Vapor pressure deficit (Pa) + + Returns + ------- + int or float or numpy.ndarray + ''' + return linear_constraint(self.vpd0, self.vpd1, form = 'reversed')(x) + + +def linear_constraint( + xmin: Number, xmax: Number, form: str = None) -> Callable: + ''' + Returns a linear ramp function, for deriving a value on [0, 1] from + an input value `x`: + + if x >= xmax: + return 1 + if x <= xmin: + return 0 + return (x - xmin) / (xmax - xmin) + + Parameters + ---------- + xmin : int or float + Lower bound of the linear ramp function + xmax : int or float + Upper bound of the linear ramp function + form : str + Type of ramp function: "reversed" decreases as x increases; + "binary" returns xmax when x == 1; default (None) is increasing + as x increases. + + Returns + ------- + function + ''' + assert form is None or form in ('reversed', 'binary'),\ + 'Argument "form" must be None or one of: "reversed", "binary"' + assert form == 'binary' or np.any(xmax >= xmin),\ + 'xmax must be greater than/ equal to xmin' + if form == 'reversed': + return lambda x: np.where(x >= xmax, 0, + np.where(x < xmin, 1, 1 - np.divide( + np.subtract(x, xmin), xmax - xmin))) + if form == 'binary': + return lambda x: np.where(x == 1, xmax, xmin) + return lambda x: np.where(x >= xmax, 1, + np.where(x < xmin, 0, + np.divide(np.subtract(x, xmin), xmax - xmin))) + + +def suppress_warnings(func): + 'Decorator to suppress NumPy warnings' + def inner(*args, **kwargs): + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + return func(*args, **kwargs) + return inner diff --git a/mod17/calibration.py b/mod17/calibration.py new file mode 100644 index 0000000..f609a39 --- /dev/null +++ b/mod17/calibration.py @@ -0,0 +1,1265 @@ +''' +Calibration of MOD17 against a representative, global eddy covariance (EC) +flux tower network. The model calibration is based on Markov-Chain Monte +Carlo (MCMC). Example use: + + python calibration.py tune-gpp --pft=1 + +The general calibration protocol used here involves: + +1. Check how well the chain(s) are mixing by running short: +`python calibration.py tune-gpp 1 --draws=2000` +2. If any chain is "sticky," run a short chain while tuning the jump scale: +`python calibration.py tune-gpp 1 --tune=scaling --draws=2000` +3. Using the trace plot from Step (2) as a reference, experiment with +different jump scales to try and achieve the same (optimal) mixing when +tuning on `lambda` (default) instead, e.g.: +`python calibration.py tune-gpp 1 --scaling=1e-2 --draws=2000` +4. When the right jump scale is found, run a chain at the desired length. + +Once a good mixture is obtained, it is necessary to prune the samples to +eliminate autocorrelation, e.g., in Python: + + sampler = MOD17StochasticSampler(...) + sampler.plot_autocorr(burn = 1000, thin = 10) + trace = sampler.get_trace(burn = 1000, thin = 10) + +A thinned posterior can be exported from the command line: + + $ python calibration.py export-bplut output.csv --burn=1000 --thin=10 + +References: + + Madani, N., Kimball, J. S., & Running, S. W. (2017). + "Improving global gross primary productivity estimates by computing + optimum light use efficiencies using flux tower data." + Journal of Geophysical Research: Biogeosciences, 122(11), 2939–2951. +''' + +import datetime +import json +import os +import warnings +import numpy as np +import h5py +import arviz as az +import pymc as pm +import aesara.tensor as at +import mod17 +from functools import partial +from multiprocessing import get_context, set_start_method +from numbers import Number +from typing import Callable, Sequence +from pathlib import Path +from matplotlib import pyplot +from scipy import signal +from mod17 import MOD17, PFT_VALID +from mod17.utils import pft_dominant, restore_bplut, write_bplut, rmsd + +MOD17_DIR = os.path.dirname(mod17.__file__) +# This matplotlib setting prevents labels from overplotting +pyplot.rcParams['figure.constrained_layout.use'] = True + + +class BlackBoxLikelihood(at.Op): + ''' + A custom Theano operator that calculates the "likelihood" of model + parameters; it takes a vector of values (the parameters that define our + model) and returns a single "scalar" value (the log-likelihood). + + Parameters + ---------- + model : Callable + An arbitrary "black box" function that takes two arguments: the + model parameters ("params") and the forcing data ("x") + observed : numpy.ndarray + The "observed" data that our log-likelihood function takes in + x : numpy.ndarray or None + The forcing data (input drivers) that our model requires, or None + if no driver data are required + weights : Sequence or None + Optional sequence of weights applied to the model residuals (as in + weighted least squares) + objective : str + Name of the objective (or "loss") function to use, one of + ('rmsd', 'gaussian', 'kge'); defaults to "rmsd" + ''' + itypes = [at.dvector] # Expects a vector of parameter values when called + otypes = [at.dscalar] # Outputs a single scalar value (the log likelihood) + + def __init__( + self, model: Callable, observed: Sequence, x: Sequence = None, + weights: Sequence = None, objective: str = 'rmsd'): + ''' + Initialise the Op with various things that our log-likelihood function + requires. The observed data ("observed") and drivers ("x") must be + stored on the instance so the Theano Op can work seamlessly. + ''' + self.model = model + self.observed = observed + self.x = x + self.weights = weights + if objective in ('rmsd', 'rmse'): + self._loglik = self.loglik + elif objective == 'gaussian': + self._loglik = self.loglik_gaussian + elif objective == 'kge': + self._loglik = self.loglik_kge + else: + raise ValueError('Unknown "objective" function specified') + + def loglik( + self, params: Sequence, observed: Sequence, + x: Sequence = None) -> Number: + ''' + Pseudo-log likelihood, based on the root-mean squared deviation + (RMSD). The sign of the RMSD is forced to be negative so as to allow + for maximization of this objective function. + + Parameters + ---------- + params : Sequence + One or more model parameters + observed : Sequence + The observed values + x : Sequence or None + Input driver data + + Returns + ------- + Number + The (negative) root-mean squared deviation (RMSD) between the + predicted and observed values + ''' + predicted = self.model(params, *x) + if self.weights is not None: + return -np.sqrt( + np.nanmean(((predicted - observed) * self.weights) ** 2)) + return -np.sqrt(np.nanmean(((predicted - observed)) ** 2)) + + def loglik_gaussian( + self, params: Sequence, observed: Sequence, + x: Sequence = None) -> Number: + ''' + Gaussian log-likelihood, assuming independent, identically distributed + observations. + + Parameters + ---------- + params : Sequence + One or more model parameters + observed : Sequence + The observed values + x : Sequence or None + Input driver data + + Returns + ------- + Number + The (negative) log-likelihood + ''' + predicted = self.model(params, *x) + sigma = params[-1] + # Gaussian log-likelihood; + # -\frac{N}{2}\,\mathrm{log}(2\pi\hat{\sigma}^2) + # - \frac{1}{2\hat{\sigma}^2} \sum (\hat{y} - y)^2 + return -0.5 * np.log(2 * np.pi * sigma**2) - (0.5 / sigma**2) *\ + np.nansum((predicted - observed)**2) + + def loglik_kge( + self, params: Sequence, observed: Sequence, + x: Sequence = None) -> Number: + r''' + Kling-Gupta efficiency. + + $$ + KGE = 1 - \sqrt{(r - 1)^2 + (\alpha - 1)^2 + (\beta - 1)^2} + $$ + + Parameters + ---------- + params : Sequence + One or more model parameters + observed : Sequence + The observed values + x : Sequence or None + Input driver data + + Returns + ------- + Number + The Kling-Gupta efficiency + ''' + predicted = self.model(params, *x) + r = np.corrcoef(predicted, observed)[0, 1] + alpha = np.std(predicted) / np.std(observed) + beta = np.sum(predicted) / np.sum(observed) + return 1 - np.sqrt((r - 1)**2 + (alpha - 1)**2 + (beta - 1)**2) + + def perform(self, node, inputs, outputs): + ''' + The method that is used when calling the Op. + + Parameters + ---------- + node + inputs : Sequence + outputs : Sequence + ''' + (params,) = inputs + logl = self._loglik(params, self.observed, self.x) + outputs[0][0] = np.array(logl) # Output the log-likelihood + + +class AbstractSampler(object): + ''' + Generic algorithm for fitting a model to data based on observed values + similar to what we can produce with our model. Not intended to be called + directly. + ''' + + def get_posterior(self, thin: int = 1) -> np.ndarray: + ''' + Returns a stacked posterior array, with optional thinning, combining + all the chains together. + + Parameters + ---------- + thin : int + + Returns + ------- + numpy.ndarray + ''' + trace = az.from_netcdf(self.backend) + return np.stack([ # i.e., get every ith element, each chain + trace['posterior'][p].values[:,::thin].ravel() + for p in self.required_parameters[self.name] + ], axis = -1) + + def get_trace( + self, thin: int = None, burn: int = None + ) -> az.data.inference_data.InferenceData: + ''' + Extracts the trace from the backend data store. + + Parameters + ---------- + thin : int + Thinning rate + burn : int + The burn-in (i.e., first N samples to discard) + ''' + trace = az.from_netcdf(self.backend) + if thin is None and burn is None: + return trace + return trace.sel(draw = slice(burn, None, thin)) + + def plot_autocorr(self, thin: int = None, burn: int = None, **kwargs): + ''' + Auto-correlation plot for an MCMC sample. + + Parameters + ---------- + thin : int + Thinning rate + burn : int + The burn-in (i.e., first N samples to discard) + **kwargs + Additional keyword arguments to `arviz.plot_autocorr()`. + ''' + assert os.path.exists(self.backend),\ + 'Could not find file backend!' + trace = az.from_netcdf(self.backend) + kwargs.setdefault('combined', True) + if thin is None: + az.plot_autocorr(trace, **kwargs) + else: + burn = 0 if burn is None else burn + az.plot_autocorr( + trace.sel(draw = slice(burn, None, thin))['posterior'], + **kwargs) + pyplot.show() + + def plot_forest(self, **kwargs): + ''' + Forest plot for an MCMC sample. + + In particular: + + - `hdi_prob`: A float indicating the highest density interval (HDF) to + plot + ''' + assert os.path.exists(self.backend),\ + 'Could not find file backend!' + trace = az.from_netcdf(self.backend) + az.plot_forest(trace, **kwargs) + pyplot.show() + + def plot_pair(self, **kwargs): + ''' + Paired variables plot for an MCMC sample. + + Parameters + ---------- + **kwargs + Additional keyword arguments to `arviz.plot_pair()`. + ''' + assert os.path.exists(self.backend),\ + 'Could not find file backend!' + trace = az.from_netcdf(self.backend) + az.plot_pair(trace, **kwargs) + pyplot.show() + + def plot_partial_score( + self, observed: Sequence, drivers: Sequence, fit: dict = None): + ''' + Plots the "partial effect" of a single parameter: the score of the + model at that parameter's current value against a sweep of possible + parameter values. All other parameters are held fixed at the best-fit + values. + + Parameters + ---------- + observed : Sequence + Sequence of observed values that will be used to calibrate the model; + i.e., model is scored by how close its predicted values are to the + observed values + drivers : list or tuple + Sequence of driver datasets to be supplied, in order, to the + model's run function + fit : dict or None + The best-fit parameter values used for those parameters that are + fixed + ''' + trace = az.from_netcdf(self.backend) + if fit is None: + # Mean of posterior are "best fit" values + fit = trace['posterior'].mean() + fit_params = list(filter( + lambda p: p in fit, self.required_parameters[self.name])) + # The NPP model depends on constants not included in the fit + constants = [] + if self.name == 'NPP': + constants = [ + self.params[p] + for p in ['LUE_max', 'tmin0', 'tmin1', 'vpd0', 'vpd1'] + ] + n = len(fit_params) + nrow = n + ncol = 1 + if n > 4: + nrow = 2 + ncol = n - (n // 2) + fig, axes = pyplot.subplots( + nrow, ncol, figsize = (n * 2, n), sharey = True) + i = 0 + for j in range(nrow): + for k in range(ncol): + if i >= n: + break + free_param = fit_params[i] + fixed = np.stack([ + fit[p].values for p in fit_params + ])[np.newaxis,:].repeat(30, axis = 0) + sweep = np.linspace( + trace['posterior'][free_param].min(), + trace['posterior'][free_param].max(), num = 30) + fixed[:,i] = sweep + # Need to concatenate GPP parameters at begining of fixed + scores = -np.array(self.score_posterior( + observed, drivers, [ + [*constants, *f] for f in fixed.tolist() + ])) + axes[j,k].plot(sweep, scores, 'k-') + axes[j,k].axvline( + fit[free_param], color = 'red', linestyle = 'dashed', + label = 'Posterior Mean') + axes[j,k].set_xlabel(free_param) + axes[j,k].set_title(free_param) + axes[j,k].legend() + i += 1 + # Delete the last empty subplot + if n % 2 != 0: + fig.delaxes(axes.flatten()[-1]) + axes[0, 0].set_ylabel('Score') + pyplot.tight_layout() + pyplot.show() + + def plot_posterior(self, **kwargs): + ''' + Plots the posterior distribution for an MCMC sample. + + Parameters + ---------- + **kwargs + Additional keyword arguments to `arviz.plot_posterior()`. + ''' + assert os.path.exists(self.backend),\ + 'Could not find file backend!' + trace = az.from_netcdf(self.backend) + az.plot_posterior(trace, **kwargs) + pyplot.show() + + def score_posterior( + self, observed: Sequence, drivers: Sequence, posterior: Sequence, + method: str = 'rmsd') -> Number: + ''' + Returns a goodness-of-fit score based on the existing calibration. + + Parameters + ---------- + observed : Sequence + Sequence of observed values that will be used to calibrate the model; + i.e., model is scored by how close its predicted values are to the + observed values + drivers : list or tuple + Sequence of driver datasets to be supplied, in order, to the + model's run function + posterior : list or tuple + Sequence of posterior parameter sets (i.e., nested sequence); each + nested sequence will be scored + method : str + The method for generating a goodness-of-git score + (Default: "rmsd") + + Returns + ------- + float + ''' + if method != 'rmsd': + raise NotImplementedError('"method" must be one of: "rmsd"') + score_func = partial( + rmsd, func = self.model, observed = observed, drivers = drivers) + with get_context('spawn').Pool() as pool: + scores = pool.map(score_func, posterior) + return scores + + +class StochasticSampler(AbstractSampler): + ''' + A Markov Chain-Monte Carlo (MCMC) sampler for an arbitrary model. The + specific sampler used is the Differential Evolution (DE) MCMC algorithm + described by Ter Braak (2008), though the implementation is specific to + the PyMC3 library. + + NOTE: The `model` (a function) should be named "_name" where "name" is + some uniquely identifiable model name. This helps `StochasticSampler.run()` + to find the correct compiler for the model. The compiler function should + be named `compiled_name_model()` (where "name" is the model name) and be + defined on a subclass of `StochasticSampler`. + + Parameters + ---------- + config : dict + Dictionary of configuration parameters + model : Callable + The function to call (with driver data and parameters); this function + should take driver data as positional arguments and the model + parameters as a `*Sequence`; it should require no external state. + observed : Sequence + Sequence of observed values that will be used to calibrate the model; + i.e., model is scored by how close its predicted values are to the + observed values + params_dict : dict or None + Dictionary of model parameters, to be used as initial values and as + the basis for constructing a new dictionary of optimized parameters + backend : str or None + Path to a NetCDF4 file backend (Default: None) + weights : Sequence or None + Optional sequence of weights applied to the model residuals (as in + weighted least squares) + ''' + def __init__( + self, config: dict, model: Callable, params_dict: dict = None, + backend: str = None, weights: Sequence = None, + model_name: str = None): + self.backend = backend + # Convert the BOUNDS into nested dicts for easy use + self.bounds = dict([ + (key, dict([('lower', b[0]), ('upper', b[1])])) + for key, b in config['optimization']['bounds'].items() + ]) + self.config = config + self.model = model + if hasattr(model, '__name__'): + self.name = model.__name__.strip('_').upper() # "_gpp" = "GPP" + else: + self.name = model_name + self.params = params_dict + # Set the model's prior distribution assumptions + self.prior = dict() + for key in self.required_parameters[self.name]: + # NOTE: This is the default only for LUE_max; other parameters, + # with Uniform distributions, don't use anything here + self.prior.setdefault(key, { + 'mu': params_dict[key], + 'sigma': 2e-4 + }) + self.weights = weights + assert os.path.exists(os.path.dirname(backend)) + + def run( + self, observed: Sequence, drivers: Sequence, + draws = 1000, chains = 3, tune = 'lambda', + scaling: float = 1e-3, prior: dict = dict(), + check_shape: bool = False, save_fig: bool = False, + var_names: Sequence = None) -> None: + ''' + Fits the model using DE-MCMCz approach. `tune="lambda"` (default) is + recommended; lambda is related to the scale of the jumps learned from + other chains, but epsilon ("scaling") controls the scale directly. + Using a larger value for `scaling` (Default: 1e-3) will produce larger + jumps and may directly address "sticky" chains. + + Parameters + ---------- + observed : Sequence + The observed data the model will be calibrated against + drivers : list or tuple + Sequence of driver datasets to be supplied, in order, to the + model's run function + draws : int + Number of samples to draw (on each chain); defaults to 1000 + chains : int + Number of chains; defaults to 3 + tune : str or None + Which hyperparameter to tune: Defaults to 'lambda', but can also + be 'scaling' or None. + scaling : float + Initial scale factor for epsilon (Default: 1e-3) + prior : dict + check_shape : bool + True to require that input driver datasets have the same shape as + the observed values (Default: False) + save_fig : bool + True to save figures to files instead of showing them + (Default: False) + var_names : Sequence + One or more variable names to show in the plot + ''' + assert not check_shape or drivers[0].shape == observed.shape,\ + 'Driver data should have the same shape as the "observed" data' + assert len(drivers) == len(self.required_drivers[self.name]),\ + 'Did not receive expected number of driver datasets!' + assert tune in ('lambda', 'scaling') or tune is None + # Update prior assumptions + self.prior.update(prior) + # Generate an initial goodness-of-fit score + predicted = self.model([ + self.params[p] for p in self.required_parameters[self.name] + ], *drivers) + if self.weights is not None: + score = np.sqrt( + np.nanmean(((predicted - observed) * self.weights) ** 2)) + else: + score = np.sqrt(np.nanmean(((predicted - observed)) ** 2)) + print('-- RMSD at the initial point: %.3f' % score) + print('Compiling model...') + try: + compiler = getattr(self, 'compile_%s_model' % self.name.lower()) + except AttributeError: + raise AttributeError('''Could not find a compiler for model named + "%s"; make sure that a function "compile_%s_model()" is defined on + this class''' % (model_name, model_name)) + with compiler(observed, drivers) as model: + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + step_func = pm.DEMetropolisZ(tune = tune, scaling = scaling) + trace = pm.sample( + draws = draws, step = step_func, cores = chains, + chains = chains, idata_kwargs = {'log_likelihood': True}) + if self.backend is not None: + print('Writing results to file...') + trace.to_netcdf(self.backend) + if var_names is None: + az.plot_trace(trace, var_names = ['~log_likelihood']) + else: + az.plot_trace(trace, var_names = var_names) + if save_fig: + pyplot.savefig('.'.join(self.backend.split('.')[:-1]) + '.png') + else: + pyplot.show() + + +class MOD17StochasticSampler(StochasticSampler): + ''' + A Markov Chain-Monte Carlo (MCMC) sampler for MOD17. The specific sampler + used is the Differential Evolution (DE) MCMC algorithm described by + Ter Braak (2008), though the implementation is specific to the PyMC3 + library. + + Considerations: + + 1. Tower GPP is censored when values are < 0 or when APAR is + < 0.1 MJ m-2 d-1. + + Parameters + ---------- + config : dict + Dictionary of configuration parameters + model : Callable + The function to call (with driver data and parameters); this function + should take driver data as positional arguments and the model + parameters as a `*Sequence`; it should require no external state. + observed : Sequence + Sequence of observed values that will be used to calibrate the model; + i.e., model is scored by how close its predicted values are to the + observed values + params_dict : dict or None + Dictionary of model parameters, to be used as initial values and as + the basis for constructing a new dictionary of optimized parameters + backend : str or None + Path to a NetCDF4 file backend (Default: None) + weights : Sequence or None + Optional sequence of weights applied to the model residuals (as in + weighted least squares) + ''' + # NOTE: This is different than for mod17.MOD17 because we haven't yet + # figured out how the respiration terms are calculated + required_parameters = { + 'GPP': ['LUE_max', 'tmin0', 'tmin1', 'vpd0', 'vpd1'], + 'NPP': MOD17.required_parameters + } + required_drivers = { + 'GPP': ['fPAR', 'Tmin', 'VPD', 'PAR'], + 'NPP': ['fPAR', 'Tmin', 'VPD', 'PAR', 'LAI', 'Tmean', 'years'] + } + + def compile_gpp_model( + self, observed: Sequence, drivers: Sequence) -> pm.Model: + ''' + Creates a new GPP model based on the prior distribution. Model can be + re-compiled multiple times, e.g., for cross validation. + + Parameters + ---------- + observed : Sequence + Sequence of observed values that will be used to calibrate the model; + i.e., model is scored by how close its predicted values are to the + observed values + drivers : list or tuple + Sequence of driver datasets to be supplied, in order, to the + model's run function + + Returns + ------- + pm.Model + ''' + # Define the objective/ likelihood function + log_likelihood = BlackBoxLikelihood( + self.model, observed, x = drivers, weights = self.weights, + objective = self.config['optimization']['objective'].lower()) + # With this context manager, "all PyMC3 objects introduced in the indented + # code block...are added to the model behind the scenes." + with pm.Model() as model: + # (Stochstic) Priors for unknown model parameters + LUE_max = pm.TruncatedNormal('LUE_max', + **self.prior['LUE_max'], **self.bounds['LUE_max']) + # NOTE: tmin0, vpd0 are fixed at Collection 6.1 values + tmin0 = self.params['tmin0'] + tmin1 = pm.Uniform('tmin1', **self.bounds['tmin1']) + # NOTE: Upper bound on `vpd1` is set by the maximum observed VPD + vpd0 = self.params['vpd0'] + vpd1 = pm.Uniform('vpd1', + lower = self.bounds['vpd1']['lower'], + upper = drivers[2].max().round(0)) + # Convert model parameters to a tensor vector + params_list = [LUE_max, tmin0, tmin1, vpd0, vpd1] + params = at.as_tensor_variable(params_list) + # Key step: Define the log-likelihood as an added potential + pm.Potential('likelihood', log_likelihood(params)) + return model + + def compile_npp_model( + self, observed: Sequence, drivers: Sequence) -> pm.Model: + ''' + Creates a new NPP model based on the prior distribution. Model can be + re-compiled multiple times, e.g., for cross validation. + + Parameters + ---------- + observed : Sequence + Sequence of observed values that will be used to calibrate the model; + i.e., model is scored by how close its predicted values are to the + observed values + drivers : list or tuple + Sequence of driver datasets to be supplied, in order, to the + model's run function + + Returns + ------- + pm.Model + ''' + # Define the objective/ likelihood function + log_likelihood = BlackBoxLikelihood( + self.model, observed, x = drivers, weights = self.weights, + objective = self.config['optimization']['objective'].lower()) + # With this context manager, "all PyMC3 objects introduced in the indented + # code block...are added to the model behind the scenes." + with pm.Model() as model: + # Setting GPP parameters that are known--EXCEPT tmin1 + LUE_max = self.params['LUE_max'] + tmin0 = self.params['tmin0'] + tmin1 = self.params['tmin1'] + vpd0 = self.params['vpd0'] + vpd1 = self.params['vpd1'] + # SLA fixed at prior mean + SLA = np.exp(self.prior['SLA']['mu']) + # Allometry ratios prescribe narrow range around Collection 6.1 values + froot_leaf_ratio = pm.Triangular( + 'froot_leaf_ratio', **self.prior['froot_leaf_ratio']) + # (Stochstic) Priors for unknown model parameters + Q10_froot = pm.TruncatedNormal( + 'Q10_froot', **self.prior['Q10_froot'], **self.bounds['Q10']) + leaf_mr_base = pm.LogNormal( + 'leaf_mr_base', **self.prior['leaf_mr_base']) + froot_mr_base = pm.LogNormal( + 'froot_mr_base', **self.prior['froot_mr_base']) + # For GRS and CRO, livewood mass and respiration are zero + if np.equal(list(self.prior['livewood_mr_base'].values()), [0, 0]).all(): + livewood_leaf_ratio = 0 + livewood_mr_base = 0 + Q10_livewood = 0 + else: + livewood_leaf_ratio = pm.Triangular( + 'livewood_leaf_ratio', **self.prior['livewood_leaf_ratio']) + livewood_mr_base = pm.LogNormal( + 'livewood_mr_base', **self.prior['livewood_mr_base']) + Q10_livewood = pm.TruncatedNormal( + 'Q10_livewood', **self.prior['Q10_livewood'], + **self.bounds['Q10']) + # Convert model parameters to a tensor vector + params_list = [ + LUE_max, tmin0, tmin1, vpd0, vpd1, SLA, + Q10_livewood, Q10_froot, froot_leaf_ratio, livewood_leaf_ratio, + leaf_mr_base, froot_mr_base, livewood_mr_base + ] + params = at.as_tensor_variable(params_list) + # Key step: Define the log-likelihood as an added potential + pm.Potential('likelihood', log_likelihood(params)) + return model + + +class CalibrationAPI(object): + ''' + Convenience class for calibrating the MOD17 GPP and NPP models. Meant to + be used with `fire.Fire()`. + ''' + def __init__(self, config = None): + config_file = config + if config_file is None: + config_file = os.path.join( + MOD17_DIR, 'data/MOD17_calibration_config.json') + with open(config_file, 'r') as file: + self.config = json.load(file) + self.hdf5 = self.config['data']['file'] + + def _clean(self, raw: Sequence, drivers: Sequence, protocol: str = 'GPP'): + 'Cleans up data values according to a prescribed protocol' + if protocol == 'GPP': + # Filter out observed GPP values when GPP is negative or when + # APAR < 0.1 g C m-2 day-1 + apar = drivers['fPAR'] * drivers['PAR'] + return np.where( + apar < 0.1, np.nan, np.where(raw < 0, np.nan, raw)) + + def _filter(self, raw: Sequence, size: int): + 'Apply a smoothing filter with zero phase offset' + if size > 1: + window = np.ones(size) / size + return np.apply_along_axis( + lambda x: signal.filtfilt(window, np.ones(1), x), 0, raw) + return raw # Or, revert to the raw data + + def clean_observed( + self, raw: Sequence, drivers: Sequence, driver_labels: Sequence, + protocol: str = 'GPP', filter_length: int = 2) -> Sequence: + ''' + Cleans observed tower flux data according to a prescribed protocol. + + - For GPP data: Removes observations where GPP < 0 or where APAR is + < 0.1 MJ m-2 day-1 + + Parameters + ---------- + raw : Sequence + drivers : Sequence + driver_labels : Sequence + protocol : str + filter_length : int + The window size for the smoothing filter, applied to the observed + data + + Returns + ------- + Sequence + ''' + if protocol != 'GPP': + raise NotImplementedError('"protocol" must be one of: "GPP"') + # Read in the observed data and apply smoothing filter + obs = self._filter(raw, filter_length) + obs = self._clean(obs, dict([ + (driver_labels[i], data) + for i, data in enumerate(drivers) + ]), protocol = 'GPP') + return obs + + def export_bplut( + self, model: str, output_path: str, thin: int = 10, + burn: int = 1000): + ''' + Export the BPLUT using the posterior mean from the MCMC sampler. NOTE: + The posterior mean is usually not the best estimate for poorly + identified parameters. + + Parameters + ---------- + model : str + The name of the model ("GPP" or "NPP") + output_path : str + The output CSV file path + thin : int + Thinning rate + burn : int + The burn-in (i.e., first N samples to discard) + ''' + params_dict = restore_bplut(self.config['BPLUT'][model]) + bplut = params_dict.copy() + # Filter the parameters to just those for the PFT of interest + for pft in PFT_VALID: + backend = self.config['optimization']['backend_template'] %\ + (model, pft) + params = dict([(k, v[pft]) for k, v in params_dict.items()]) + sampler = MOD17StochasticSampler( + self.config, getattr(MOD17, '_%s' % model.lower()), params, + backend = backend) + trace = sampler.get_trace() + fit = trace.sel( + draw = slice(burn, None, thin))['posterior'].mean() + for each in MOD17.required_parameters: + try: + bplut[each][pft] = float(fit[each]) + except KeyError: + continue + write_bplut(bplut, output_path) + + def export_posterior( + self, model: str, param: str, output_path: str, thin: int = 10, + burn: int = 1000, k_folds: int = 1): + ''' + Exports posterior distribution for a parameter, for each PFT to HDF5. + + Parameters + ---------- + model : str + The name of the model ("GPP" or "NPP") + param : str + The model parameter to export + output_path : str + The output HDF5 file path + thin : int + Thinning rate + burn : int + The burn-in (i.e., first N samples to discard) + k_folds : int + The number of k-folds used in cross-calibration/validation; + if more than one (default), the folds for each PFT will be + combined into a single HDF5 file + ''' + params_dict = restore_bplut(self.config['BPLUT'][model]) + bplut = params_dict.copy() + # Filter the parameters to just those for the PFT of interest + post = [] + for pft in PFT_VALID: + params = dict([(k, v[pft]) for k, v in params_dict.items()]) + backend = self.config['optimization']['backend_template'] %\ + (model, pft) + post_by_fold = [] + for fold in range(1, k_folds + 1): + if k_folds > 1: + backend = self.config['optimization']['backend_template'] %\ + (f'{model}-k{fold}', pft) + sampler = MOD17StochasticSampler( + self.config, getattr(MOD17, '_%s' % model.lower()), params, + backend = backend) + trace = sampler.get_trace() + fit = trace.sel(draw = slice(burn, None, thin))['posterior'] + num_samples = fit.sizes['chain'] * fit.sizes['draw'] + if param in fit: + post_by_fold.append( + az.extract_dataset(fit, combined = True)[param].values) + else: + # In case there is, e.g., a parameter that takes on a + # constant value for a specific PFT + if k_folds > 1: + post_by_fold.append(np.ones((1, num_samples)) * np.nan) + else: + post_by_fold.append(np.ones(num_samples) * np.nan) + if k_folds > 1: + post.append(np.vstack(post_by_fold)) + else: + post.extend(post_by_fold) + # If not every PFT's posterior has the same number of samples (e.g., + # when one set of chains was run longer than another)... + if not all([p.shape == post[0].shape for p in post]): + max_len = max([p.shape for p in post])[0] + # ...Reshape all posteriors to match the greatest sample size + import ipdb + ipdb.set_trace()#FIXME + post = [ + np.pad( + p.astype(np.float32), (0, max_len - p.size), + mode = 'constant', constant_values = (np.nan,)) + for p in post + ] + with h5py.File(output_path, 'a') as hdf: + post = np.stack(post) + ts = datetime.date.today().strftime('%Y-%m-%d') # Today's date + dataset = hdf.create_dataset( + f'{param}_posterior', post.shape, np.float32, post) + dataset.attrs['description'] = 'CalibrationAPI.export_posterior() on {ts}' + + def export_likely_posterior( + self, model: str, param: str, output_path: str, thin: int = 10, + burn: int = 1000, ptile: int = 95): + ''' + Exports posterior distribution for a parameter based on likelihood + + Parameters + ---------- + model : str + The name of the model ("GPP" or "NPP") + param : str + The model parameter to export + output_path : str + The output HDF5 file path + thin : int + Thinning rate + burn : int + The burn-in (i.e., first N samples to discard) + ptile : int + The percentile cutoff for likelihood; only samples at or above + this likelihood cutoff will be included + ''' + params_dict = restore_bplut(self.config['BPLUT'][model]) + bplut = params_dict.copy() + # Filter the parameters to just those for the PFT of interest + post = [] + likelihood = [] + for pft in PFT_VALID: + backend = self.config['optimization']['backend_template'] % (model, pft) + params = dict([(k, v[pft]) for k, v in params_dict.items()]) + sampler = MOD17StochasticSampler( + self.config, getattr(MOD17, '_%s' % model.lower()), params, + backend = backend) + trace = sampler.get_trace() + fit = trace.sel(draw = slice(burn, None, thin)) + # Find the likelihood value associated with the cutoff percentile + ll = az.extract_dataset( + fit, combined = True)['log_likelihood'].values + values = az.extract_dataset(fit, combined = True)[param].values + cutoff = np.percentile(ll, ptile) + post.append(values[ll >= cutoff]) + likelihood.append(ll[ll >= cutoff]) + with h5py.File(output_path, 'a') as hdf: + n = max([len(p) for p in post]) + # Make sure all arrays are the same size + post = np.stack([ + np.concatenate((p, np.full((n - len(p),), np.nan))) + for p in post + ]) + likelihood = np.stack([ + np.concatenate((p, np.full((n - len(p),), np.nan))) + for p in likelihood + ]) + ts = datetime.date.today().strftime('%Y-%m-%d') # Today's date + dataset = hdf.create_dataset( + f'{param}_posterior', post.shape, np.float32, post) + dataset.attrs['description'] = 'CalibrationAPI.export_likely_posterior() on {ts}' + dataset = hdf.create_dataset( + f'{param}_likelihood', likelihood.shape, np.float32, likelihood) + + def tune_gpp( + self, pft: int, plot_trace: bool = False, ipdb: bool = False, + save_fig: bool = False, **kwargs): + ''' + Run the MOD17 GPP calibration. + + Parameters + ---------- + pft : int + The Plant Functional Type (PFT) to calibrate + plot_trace : bool + True to plot the trace for a previous calibration run; this will + also NOT start a new calibration (Default: False) + ipdb : bool + True to drop the user into an ipdb prompt, prior to and instead of + running calibration + save_fig : bool + True to save figures to files instead of showing them + (Default: False) + **kwargs + Additional keyword arguments passed to + `MOD17StochasticSampler.run()` + ''' + assert pft in PFT_VALID, f'Invalid PFT: {pft}' + # Pass configuration parameters to MOD17StochasticSampler.run() + for key in ('chains', 'draws', 'tune', 'scaling'): + if key in self.config['optimization'].keys() and not key in kwargs.keys(): + kwargs[key] = self.config['optimization'][key] + # Filter the parameters to just those for the PFT of interest + params_dict = restore_bplut(self.config['BPLUT']['GPP']) + # Load blacklisted sites (if any) + blacklist = self.config['data']['sites_blacklisted'] + params_dict = dict([(k, v[pft]) for k, v in params_dict.items()]) + model = MOD17(params_dict) + objective = self.config['optimization']['objective'].lower() + + print('Loading driver datasets...') + with h5py.File(self.hdf5, 'r') as hdf: + sites = hdf['FLUXNET/site_id'][:] + if hasattr(sites[0], 'decode'): + sites = list(map(lambda x: x.decode('utf-8'), sites)) + # Get dominant PFT + pft_map = pft_dominant(hdf['state/PFT'][:], site_list = sites) + # Blacklist various sites + pft_mask = np.logical_and(pft_map == pft, ~np.in1d(sites, blacklist)) + # NOTE: Converting from Kelvin to Celsius + tday = hdf['MERRA2/T10M_daytime'][:][:,pft_mask] - 273.15 + qv10m = hdf['MERRA2/QV10M_daytime'][:][:,pft_mask] + ps = hdf['MERRA2/PS_daytime'][:][:,pft_mask] + drivers = [ # fPAR, Tmin, VPD, PAR + hdf['MODIS/MOD15A2HGF_fPAR_interp'][:][:,pft_mask], + hdf['MERRA2/Tmin'][:][:,pft_mask] - 273.15, + MOD17.vpd(qv10m, ps, tday), + MOD17.par(hdf['MERRA2/SWGDN'][:][:,pft_mask]), + ] + # Convert fPAR from (%) to [0,1] + drivers[0] = np.nanmean(drivers[0], axis = -1) / 100 + # If RMSE is used, then we want to pay attention to weighting + weights = None + if objective in ('rmsd', 'rmse'): + weights = hdf['weights'][pft_mask][np.newaxis,:]\ + .repeat(tday.shape[0], axis = 0) + # Check that driver data do not contain NaNs + for d, each in enumerate(drivers): + name = ('fPAR', 'Tmin', 'VPD', 'PAR')[d] + assert not np.isnan(each).any(),\ + f'Driver dataset "{name}" contains NaNs' + tower_gpp = hdf['FLUXNET/GPP'][:][:,pft_mask] + # Read the validation mask; mask out observations that are + # reserved for validation + print('Masking out validation data...') + mask = hdf['FLUXNET/validation_mask'][pft] + tower_gpp[mask] = np.nan + + # Clean observations, then mask out driver data where the are no + # observations + tower_gpp = self.clean_observed( + tower_gpp, drivers, MOD17StochasticSampler.required_drivers['GPP'], + protocol = 'GPP') + if weights is not None: + weights = weights[~np.isnan(tower_gpp)] + for d, _ in enumerate(drivers): + drivers[d] = drivers[d][~np.isnan(tower_gpp)] + tower_gpp = tower_gpp[~np.isnan(tower_gpp)] + + print('Initializing sampler...') + backend = self.config['optimization']['backend_template'] % ('GPP', pft) + sampler = MOD17StochasticSampler( + self.config, MOD17._gpp, params_dict, backend = backend, + weights = weights) + if plot_trace or ipdb: + if ipdb: + import ipdb + ipdb.set_trace() + trace = sampler.get_trace() + az.plot_trace(trace, var_names = MOD17.required_parameters[0:5]) + pyplot.show() + return + # Get (informative) priors for just those parameters that have them + with open(self.config['optimization']['prior'], 'r') as file: + prior = json.load(file) + prior_params = filter( + lambda p: p in prior.keys(), sampler.required_parameters['GPP']) + prior = dict([ + (p, {'mu': prior[p]['mu'][pft], 'sigma': prior[p]['sigma'][pft]}) + for p in prior_params + ]) + sampler.run( + tower_gpp, drivers, prior = prior, save_fig = save_fig, **kwargs) + + def tune_npp( + self, pft: int, plot_trace: bool = False, ipdb: bool = False, + save_fig: bool = False, climatology = False, + cutoff: Number = 2385, k_folds: int = 1, **kwargs): + r''' + Run the MOD17 NPP calibration. If k-folds cross-validation is used, + the model is calibrated on $k$ random subsets of the data and a + series of file is created, e.g., as: + + MOD17_NPP_calibration_PFT1.h5 + MOD17_NPP-k1_calibration_PFT1.nc4 + MOD17_NPP-k2_calibration_PFT1.nc4 + ... + + Where each `.nc4` file is a standard `arviz` backend and the `.h5` + indicates which indices from the NPP observations vector, after + removing NaNs, were excluded (i.e., the indices of the test data). + + Parameters + ---------- + pft : int + The Plant Functional Type (PFT) to calibrate + plot_trace : bool + True to display the trace plot ONLY and not run calibration + (Default: False) + ipdb : bool + True to drop into an interactive Python debugger (`ipdb`) after + loading an existing trace (Default: False) + save_fig : bool + True to save the post-calibration trace plot to a file instead of + displaying it (Default: False) + climatology : bool + True to use a MERRA-2 climatology (and look for it in the drivers + file), i.e., use `MERRA2_climatology` group instead of + `surface_met_MERRA2` group (Default: False) + cutoff : Number + Maximum value of observed NPP (g C m-2 year-1); values above this + cutoff will be discarded and not used in calibration + (Default: 2385) + k_folds : int + Number of folds to use in k-folds cross-validation; defaults to + k=1, i.e., no cross-validation is performed. + **kwargs + Additional keyword arguments passed to + `MOD17StochasticSampler.run()` + ''' + assert pft in PFT_VALID, f'Invalid PFT: {pft}' + prefix = 'MERRA2_climatology' if climatology else 'surface_met_MERRA2' + params_dict = restore_bplut(self.config['BPLUT']['NPP']) + # Filter the parameters to just those for the PFT of interest + params_dict = dict([(k, v[pft]) for k, v in params_dict.items()]) + model = MOD17(params_dict) + kwargs.update({'var_names': [ + '~LUE_max', '~tmin0', '~tmin1', '~vpd0', '~vpd1', '~log_likelihood' + ]}) + # Pass configuration parameters to MOD17StochasticSampler.run() + for key in ('chains', 'draws', 'tune', 'scaling'): + if key in self.config['optimization'].keys(): + kwargs[key] = self.config['optimization'][key] + print('Loading driver datasets...') + with h5py.File(self.hdf5, 'r') as hdf: + # NOTE: This is only recorded at the site-level; no need to + # determine modal PFT across subgrid + pft_map = hdf['NPP/PFT'][:] + # Leave out sites where there is no fPAR (and no LAI) data + fpar = hdf['NPP/MOD15A2H_fPAR_clim_filt'][:] + mask = np.logical_and( + pft_map == pft, ~np.isnan(np.nanmean(fpar, axis = -1))\ + .all(axis = 0)) + # NOTE: Converting from Kelvin to Celsius + tday = hdf[f'NPP/{prefix}/T10M_daytime'][:][:,mask] - 273.15 + qv10m = hdf[f'NPP/{prefix}/QV10M_daytime'][:][:,mask] + ps = hdf[f'NPP/{prefix}/PS_daytime'][:][:,mask] + drivers = [ # fPAR, Tmin, VPD, PAR, LAI, Tmean, years + hdf['NPP/MOD15A2H_fPAR_clim_filt'][:][:,mask], + hdf[f'NPP/{prefix}/Tmin'][:][:,mask] - 273.15, + MOD17.vpd(qv10m, ps, tday), + MOD17.par(hdf[f'NPP/{prefix}/SWGDN'][:][:,mask]), + hdf['NPP/MOD15A2H_LAI_clim_filt'][:][:,mask], + hdf[f'NPP/{prefix}/T10M'][:][:,mask] - 273.15, + np.full(ps.shape, 1) # i.e., A 365-day climatological year ("Year 1") + ] + observed_npp = hdf['NPP/NPP_total'][:][mask] + if cutoff is not None: + observed_npp[observed_npp > cutoff] = np.nan + # Set negative VPD to zero + drivers[2] = np.where(drivers[2] < 0, 0, drivers[2]) + # Convert fPAR from (%) to [0,1] and re-scale LAI; reshape fPAR, LAI + drivers[0] = np.nanmean(drivers[0], axis = -1) * 0.01 + drivers[4] = np.nanmean(drivers[4], axis = -1) * 0.1 + # TODO Mask out driver data where the are no observations + for d, _ in enumerate(drivers): + drivers[d] = drivers[d][:,~np.isnan(observed_npp)] + observed_npp = observed_npp[~np.isnan(observed_npp)] + + if k_folds > 1: + # Back-up the original (complete) datasets + _drivers = [d.copy() for d in drivers] + _observed_npp = observed_npp.copy() + # Randomize the indices of the NPP data + indices = np.arange(0, observed_npp.size) + np.random.shuffle(indices) + # Get the starting and ending index of each fold + fold_idx = np.array([indices.size // k_folds] * k_folds) * np.arange(0, k_folds) + fold_idx = list(map(list, zip(fold_idx, fold_idx + indices.size // k_folds))) + # Ensure that the entire dataset is used + fold_idx[-1][-1] = indices.max() + idx_test = [indices[start:end] for start, end in fold_idx] + + # Loop over each fold (or the entire dataset, if num. folds == 1) + for k, fold in enumerate(range(1, k_folds + 1)): + backend = self.config['optimization']['backend_template'] % ('NPP', pft) + if k_folds > 1 and fold == 1: + # Create an HDF5 file with the same name as the (original) + # netCDF4 back-end, store the test indices + with h5py.File(backend.replace('nc4', 'h5'), 'w') as hdf: + out = list(idx_test) + size = indices.size // k_folds + try: + out = np.stack(out) + except ValueError: + size = max((o.size for o in out)) + for i in range(0, len(out)): + out[i] = np.concatenate((out[i], [np.nan] * (size - out[i].size))) + hdf.create_dataset( + 'test_indices', (k_folds, size), np.int32, np.stack(out)) + # Restore the original NPP dataset + observed_npp = _observed_npp.copy() + # Set to NaN all the test indices + idx = idx_test[k] + observed_npp[idx] = np.nan + # Same for drivers, after restoring from the original + drivers = [d.copy()[:,~np.isnan(observed_npp)] for d in _drivers] + observed_npp = observed_npp[~np.isnan(observed_npp)] + # Use a different naming scheme for the backend + if k_folds > 1: + backend = self.config['optimization']['backend_template'] % (f'NPP-k{fold}', pft) + + print('Initializing sampler...') + sampler = MOD17StochasticSampler( + self.config, MOD17._npp, params_dict, backend = backend, + model_name = 'NPP') + if plot_trace or ipdb: + if ipdb: + import ipdb + ipdb.set_trace() + trace = sampler.get_trace() + az.plot_trace( + trace, var_names = MOD17.required_parameters[5:]) + pyplot.show() + return + # Get (informative) priors for just those parameters that have them + with open(self.config['optimization']['prior'], 'r') as file: + prior = json.load(file) + prior_params = filter( + lambda p: p in prior.keys(), sampler.required_parameters['NPP']) + prior = dict([ + (p, prior[p]) for p in prior_params + ]) + for key in prior.keys(): + # And make sure to subset to the chosen PFT! + for arg in prior[key].keys(): + prior[key][arg] = prior[key][arg][pft] + sampler.run( + observed_npp, drivers, prior = prior, save_fig = save_fig, + **kwargs) + + +if __name__ == '__main__': + import fire + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + fire.Fire(CalibrationAPI) diff --git a/mod17/data/MOD17_BPLUT_C5.1_MERRA_NASA.csv b/mod17/data/MOD17_BPLUT_C5.1_MERRA_NASA.csv new file mode 100644 index 0000000..865b401 --- /dev/null +++ b/mod17/data/MOD17_BPLUT_C5.1_MERRA_NASA.csv @@ -0,0 +1,13 @@ +UMD_VEG_LC,ENF=0,EBF=1,DNF=2,DBF=3,MF=4,CShrub=5,OShrub=6,WSavannas=7,Savannas=8, Grass=9,Crop=10 +LUEmax(KgC/m^2/d/MJ),0.001211,0.001405,0.001227,0.001526,0.001226,0.001495,0.001027,0.001498,0.001454,0.001215,0.0013 +Tmin_min(C),-8,-8,-8,-6,-7,-8,-8,-8,-8,-8,-8 +Tmin_max(C),8.31,9.09,10.44,9.94,9.5,8.61,8.8,11.39,11.39,12.02,12.02 +VPD_min(Pa),650,1000,650,650,650,650,650,650,650,650,650 +VPD_max(Pa),3000,4000,3500,2900,2900,4300,4400,3500,3600,4200,4500 +SLA(LAI/KgC),15,26.9,16.9,24.7,22.6,9.4,12,28.8,28.9,38,38 +Q10(unitless),2,2,2,2,2,2,2,2,2,2,2 +froot_leaf_ratio,1.2,1.1,1.7,1.1,1.1,1,1.3,1.8,1.8,2.6,2 +livewood_leaf_ratio,0.182,0.162,0.165,0.203,0.203,0.079,0.04,0.091,0.051,0,0 +leaf_mr_base(Kgc/KgC/d),0.00604,0.00604,0.00815,0.00778,0.00778,0.00869,0.00519,0.00869,0.00869,0.0098,0.0098 +froot_mr_base(Kgc/KgC/d),0.00519,0.00519,0.00519,0.00519,0.00519,0.00519,0.00519,0.00519,0.00519,0.00819,0.00819 +livewood_mr_base(Kgc/KgC/d),0.00397,0.00397,0.00397,0.00371,0.00371,0.00436,0.00218,0.00312,0.001,0,0 diff --git a/mod17/data/MOD17_BPLUT_CX.X_MERRA_NASA.csv b/mod17/data/MOD17_BPLUT_CX.X_MERRA_NASA.csv new file mode 100644 index 0000000..24503a1 --- /dev/null +++ b/mod17/data/MOD17_BPLUT_CX.X_MERRA_NASA.csv @@ -0,0 +1,14 @@ +UMD_VEG_LC,ENF=0,EBF=1,DNF=2,DBF=3,MF=4,CShrub=5,OShrub=6,WSavannas=7,Savannas=8, Grass=9,Crop=10 +LUEmax(KgC/m^2/d/MJ),0.001211,0.001405,0.001227,0.001526,0.001226,0.001495,0.001027,0.001498,0.001454,0.001215,0.0013 +Tmin_min(C),-8,-8,-8,-6,-7,-8,-8,-8,-8,-8,-8 +Tmin_max(C),8.31,9.09,10.44,9.94,9.5,8.61,8.8,11.39,11.39,12.02,12.02 +VPD_min(Pa),650,1000,650,650,650,650,650,650,650,650,650 +VPD_max(Pa),3000,4000,3500,2900,2900,4300,4400,3500,3600,4200,4500 +SLA(LAI/KgC),15,26.9,16.9,24.7,22.6,9.4,12,28.8,28.9,38,38 +Q10_livewood(unitless),2,2,2,2,2,2,2,2,2,2,2 +Q10_froot(unitless),2,2,2,2,2,2,2,2,2,2,2 +froot_leaf_ratio,1.2,1.1,1.7,1.1,1.1,1,1.3,1.8,1.8,2.6,2 +livewood_leaf_ratio,0.182,0.162,0.165,0.203,0.203,0.079,0.04,0.091,0.051,0,0 +leaf_mr_base(Kgc/KgC/d),0.00604,0.00604,0.00815,0.00778,0.00778,0.00869,0.00519,0.00869,0.00869,0.0098,0.0098 +froot_mr_base(Kgc/KgC/d),0.00519,0.00519,0.00519,0.00519,0.00519,0.00519,0.00519,0.00519,0.00519,0.00819,0.00819 +livewood_mr_base(Kgc/KgC/d),0.00397,0.00397,0.00397,0.00371,0.00371,0.00436,0.00218,0.00312,0.001,0,0 diff --git a/mod17/data/MOD17_BPLUT_prior_20220525.json b/mod17/data/MOD17_BPLUT_prior_20220525.json new file mode 100644 index 0000000..6ec4e9e --- /dev/null +++ b/mod17/data/MOD17_BPLUT_prior_20220525.json @@ -0,0 +1,82 @@ +{ + "LUE_max": { + "mu": [ + null, 0.00098, 0.0014 , 0.00123, 0.00168, 0.00143, 0.0008 , + 0.00074, 0.00093, 0.00093, 0.00119, null, 0.00194 + ], + "sigma": [ + null, 0.00032, 0.0002 , 0.0002 , 0.00035, 0.00037, 0.00038, + 0.00021, 0.00037, 0.00038, 0.00045, null, 0.00055 + ] + }, + "SLA": { + "mu": [ + null, 2.25, 2.68, 2.26, 2.95, 2.97, 2.66, + 2.62, 2.78, 2.76, 2.82, null, 2.98 + ], + "sigma": [ + null, 0.74, 0.45, 0.73, 0.55, 0.42, 0.53, + 0.56, 0.62, 0.57, 0.51, null, 0.50 + ] + }, + "Q10_livewood": { + "mu": [ + null, 1.89, 1.84, 1.89, 1.84, 1.84, 1.84, + 1.84, 1.84, 1.84, 0.00, null, 0.00 + ], + "sigma": [ + null, 0.27, 0.22, 0.27, 0.22, 0.22, 0.22, + 0.22, 0.22, 0.22, 0.00, null, 0.00 + ] + }, + "Q10_froot": { + "mu": [ + null, 1.60, 1.60, 1.60, 1.60, 1.60, 1.60, + 1.60, 1.60, 1.60, 1.60, null, 1.60 + ], + "sigma": [ + null, 1.60, 1.60, 1.60, 1.60, 1.60, 1.60, + 1.60, 1.60, 1.60, 1.60, null, 1.60 + ] + }, + "froot_leaf_ratio": { + "mu": [ + null, 0.009, 0.009, 0.009, -0.013, -0.014, -0.084, + -0.084, 0.009, 0.009, 0.075, null, 0.009 + ], + "sigma": [ + null, 0.216, 0.216, 0.216, 0.067, 0.124, 0.156, + 0.156, 0.216, 0.216, 0.333, null, 0.216 + ] + }, + "leaf_mr_base": { + "mu": [ + null, -4.65, -4.65, -4.35, -4.35, -4.56, -4.36, + -4.36, -4.36, -4.36, -4.03, null, -3.78 + ], + "sigma": [ + null, 0.668, 0.668, 0.546, 0.546, 0.614, 0.704, + 0.704, 0.704, 0.704, 0.536, null, 0.855 + ] + }, + "froot_mr_base": { + "mu": [ + null, -4.21, -4.21, -4.21, -4.21, -4.21, -4.21, + -4.21, -4.21, -4.21, -4.21, null, -4.21 + ], + "sigma": [ + null, 0.84, 0.84, 0.84, 0.84, 0.84, 0.84, + 0.84, 0.84, 0.84, 0.84, null, 0.84 + ] + }, + "livewood_mr_base": { + "mu": [ + null, -5.27, -5.27, -5.27, -5.27, -5.27, -5.27, + -5.27, -5.27, -5.27, 0.00, null, 0.00 + ], + "sigma": [ + null, 1.54, 1.54, 1.54, 1.54, 1.54, 1.54, + 1.54, 1.54, 1.54, 0.00, null, 0.00 + ] + } +} diff --git a/mod17/data/MOD17_BPLUT_prior_20220611.json b/mod17/data/MOD17_BPLUT_prior_20220611.json new file mode 100644 index 0000000..6b24647 --- /dev/null +++ b/mod17/data/MOD17_BPLUT_prior_20220611.json @@ -0,0 +1,82 @@ +{ + "LUE_max": { + "mu": [ + null, 0.00098, 0.0014 , 0.00123, 0.00168, 0.00143, 0.0008 , + 0.00074, 0.00093, 0.00093, 0.00119, null, 0.00194 + ], + "sigma": [ + null, 0.00032, 0.0002 , 0.0002 , 0.00035, 0.00037, 0.00038, + 0.00021, 0.00037, 0.00038, 0.00045, null, 0.00055 + ] + }, + "SLA": { + "mu": [ + null, 2.25, 2.68, 2.26, 2.95, 2.97, 2.66, + 2.62, 2.78, 2.76, 2.82, null, 2.98 + ], + "sigma": [ + null, 0.74, 0.45, 0.73, 0.55, 0.42, 0.53, + 0.56, 0.62, 0.57, 0.51, null, 0.50 + ] + }, + "Q10_livewood": { + "mu": [ + null, 1.89, 1.84, 1.89, 1.84, 1.84, 1.84, + 1.84, 1.84, 1.84, 0.00, null, 0.00 + ], + "sigma": [ + null, 0.27, 0.22, 0.27, 0.22, 0.22, 0.22, + 0.22, 0.22, 0.22, 0.00, null, 0.00 + ] + }, + "Q10_froot": { + "mu": [ + null, 1.60, 1.60, 1.60, 1.60, 1.60, 1.60, + 1.60, 1.60, 1.60, 1.60, null, 1.60 + ], + "sigma": [ + null, 1.60, 1.60, 1.60, 1.60, 1.60, 1.60, + 1.60, 1.60, 1.60, 1.60, null, 1.60 + ] + }, + "froot_leaf_ratio": { + "mu": [ + null, 1.2, 1.1, 1.7, 1.1, 1.1, 1.0, + 1.3, 1.8, 1.8, 2.6,null, 2.0 + ], + "sigma": [ + null, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, + 0.5, 0.5, 0.5, 0.5,null, 0.5 + ] + }, + "leaf_mr_base": { + "mu": [ + null, -4.65, -4.65, -4.35, -4.35, -4.56, -4.36, + -4.36, -4.36, -4.36, -4.03, null, -3.78 + ], + "sigma": [ + null, 0.668, 0.668, 0.546, 0.546, 0.614, 0.704, + 0.704, 0.704, 0.704, 0.536, null, 0.855 + ] + }, + "froot_mr_base": { + "mu": [ + null, -4.21, -4.21, -4.21, -4.21, -4.21, -4.21, + -4.21, -4.21, -4.21, -4.21, null, -4.21 + ], + "sigma": [ + null, 0.84, 0.84, 0.84, 0.84, 0.84, 0.84, + 0.84, 0.84, 0.84, 0.84, null, 0.84 + ] + }, + "livewood_mr_base": { + "mu": [ + null, -5.27, -5.27, -5.27, -5.27, -5.27, -5.27, + -5.27, -5.27, -5.27, 0.00, null, 0.00 + ], + "sigma": [ + null, 1.54, 1.54, 1.54, 1.54, 1.54, 1.54, + 1.54, 1.54, 1.54, 0.00, null, 0.00 + ] + } +} diff --git a/mod17/data/MOD17_BPLUT_prior_20220621.json b/mod17/data/MOD17_BPLUT_prior_20220621.json new file mode 100644 index 0000000..6d9a149 --- /dev/null +++ b/mod17/data/MOD17_BPLUT_prior_20220621.json @@ -0,0 +1,83 @@ +{ + "description": "Final prior for GPP and NPP models, where SLA is based on TRY data in C units and froot_leaf_ratio is fixed at C6.1 values", + "LUE_max": { + "mu": [ + null, 0.00098, 0.0014 , 0.00123, 0.00168, 0.00143, 0.0008 , + 0.00074, 0.00093, 0.00093, 0.00119, null, 0.00194 + ], + "sigma": [ + null, 0.00032, 0.0002 , 0.0002 , 0.00035, 0.00037, 0.00038, + 0.00021, 0.00037, 0.00038, 0.00045, null, 0.00055 + ] + }, + "SLA": { + "mu": [ + null, 3.03, 3.21, 3.77, 3.62, 3.56, 3.38, + 3.38, 3.60, 3.58, 3.59, null, 3.72 + ], + "sigma": [ + null, 0.958, 0.452, 0.204, 0.623, 0.580, 0.506, + 0.506, 0.696, 0.526, 0.542, null, 0.596 + ] + }, + "Q10_livewood": { + "mu": [ + null, 1.89, 1.84, 1.89, 1.84, 1.84, 1.84, + 1.84, 1.84, 1.84, 0.00, null, 0.00 + ], + "sigma": [ + null, 0.27, 0.22, 0.27, 0.22, 0.22, 0.22, + 0.22, 0.22, 0.22, 0.00, null, 0.00 + ] + }, + "Q10_froot": { + "mu": [ + null, 1.60, 1.60, 1.60, 1.60, 1.60, 1.60, + 1.60, 1.60, 1.60, 1.60, null, 1.60 + ], + "sigma": [ + null, 1.60, 1.60, 1.60, 1.60, 1.60, 1.60, + 1.60, 1.60, 1.60, 1.60, null, 1.60 + ] + }, + "froot_leaf_ratio": { + "mu": [ + null, 1.2, 1.1, 1.7, 1.1, 1.1, 1.0, + 1.3, 1.8, 1.8, 2.6,null, 2.0 + ], + "sigma": [ + null, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, + 0.5, 0.5, 0.5, 0.5,null, 0.5 + ] + }, + "leaf_mr_base": { + "mu": [ + null, -4.65, -4.65, -4.35, -4.35, -4.56, -4.36, + -4.36, -4.36, -4.36, -4.03, null, -3.78 + ], + "sigma": [ + null, 0.668, 0.668, 0.546, 0.546, 0.614, 0.704, + 0.704, 0.704, 0.704, 0.536, null, 0.855 + ] + }, + "froot_mr_base": { + "mu": [ + null, -4.21, -4.21, -4.21, -4.21, -4.21, -4.21, + -4.21, -4.21, -4.21, -4.21, null, -4.21 + ], + "sigma": [ + null, 0.84, 0.84, 0.84, 0.84, 0.84, 0.84, + 0.84, 0.84, 0.84, 0.84, null, 0.84 + ] + }, + "livewood_mr_base": { + "mu": [ + null, -5.27, -5.27, -5.27, -5.27, -5.27, -5.27, + -5.27, -5.27, -5.27, 0.00, null, 0.00 + ], + "sigma": [ + null, 1.54, 1.54, 1.54, 1.54, 1.54, 1.54, + 1.54, 1.54, 1.54, 0.00, null, 0.00 + ] + } +} diff --git a/mod17/data/MOD17_BPLUT_prior_20221027.json b/mod17/data/MOD17_BPLUT_prior_20221027.json new file mode 100644 index 0000000..c5dd031 --- /dev/null +++ b/mod17/data/MOD17_BPLUT_prior_20221027.json @@ -0,0 +1,101 @@ +{ + "description": "Adjusted SLA for Shrubs; added Triangular priors for allometric ratios; SLA is based on TRY data in C units and froot_leaf_ratio is fixed at C6.1 values", + "LUE_max": { + "mu": [ + null, 0.00098, 0.0014 , 0.00123, 0.00168, 0.00143, 0.0008 , + 0.00074, 0.00093, 0.00093, 0.00119, null, 0.00194 + ], + "sigma": [ + null, 0.00032, 0.0002 , 0.0002 , 0.00035, 0.00037, 0.00038, + 0.00021, 0.00037, 0.00038, 0.00045, null, 0.00055 + ] + }, + "SLA": { + "mu": [ + null, 3.03, 3.21, 3.77, 3.62, 3.56, 3.21, + 3.21, 3.60, 3.58, 3.59, null, 3.72 + ], + "sigma": [ + null, 0.958, 0.452, 0.204, 0.623, 0.580, 0.310, + 0.310, 0.696, 0.526, 0.542, null, 0.596 + ] + }, + "Q10_livewood": { + "mu": [ + null, 1.89, 1.84, 1.89, 1.84, 1.84, 1.84, + 1.84, 1.84, 1.84, 0.00, null, 0.00 + ], + "sigma": [ + null, 0.27, 0.22, 0.27, 0.22, 0.22, 0.22, + 0.22, 0.22, 0.22, 0.00, null, 0.00 + ] + }, + "Q10_froot": { + "mu": [ + null, 1.60, 1.60, 1.60, 1.60, 1.60, 1.60, + 1.60, 1.60, 1.60, 1.60, null, 1.60 + ], + "sigma": [ + null, 1.60, 1.60, 1.60, 1.60, 1.60, 1.60, + 1.60, 1.60, 1.60, 1.60, null, 1.60 + ] + }, + "froot_leaf_ratio": { + "c": [ + null, 1.2, 1.1, 1.7, 1.1, 1.1, 1.0, + 1.3, 1.8, 1.8, 2.6,null, 2.0 + ], + "lower": [ + null, 0.8, 0.7, 1.3, 0.7, 0.7, 0.6, + 0.9, 1.4, 1.4, 2.2,null, 1.8 + ], + "upper": [ + null, 1.6, 1.5, 2.1, 1.5, 1.5, 1.4, + 1.7, 2.2, 2.2, 3.0,null, 2.4 + ] + }, + "livewood_leaf_ratio": { + "c": [ + null, 0.182, 0.162, 0.165, 0.203, 0.203, 0.079, + 0.040, 0.091, 0.051, 0.0 , null, 0.0 + ], + "lower": [ + null, 0.142, 0.122, 0.125, 0.163, 0.163, 0.039, + 0.001, 0.051, 0.011, 0.0 , null, 0.0 + ], + "upper": [ + null, 0.222, 0.202, 0.205, 0.243, 0.243, 0.199, + 0.080, 0.131, 0.091, 0.0 , null, 0.0 + ] + }, + "leaf_mr_base": { + "mu": [ + null, -4.65, -4.65, -4.35, -4.35, -4.56, -4.36, + -4.36, -4.36, -4.36, -4.03, null, -3.78 + ], + "sigma": [ + null, 0.668, 0.668, 0.546, 0.546, 0.614, 0.704, + 0.704, 0.704, 0.704, 0.536, null, 0.855 + ] + }, + "froot_mr_base": { + "mu": [ + null, -4.21, -4.21, -4.21, -4.21, -4.21, -4.21, + -4.21, -4.21, -4.21, -4.21, null, -4.21 + ], + "sigma": [ + null, 0.84, 0.84, 0.84, 0.84, 0.84, 0.84, + 0.84, 0.84, 0.84, 0.84, null, 0.84 + ] + }, + "livewood_mr_base": { + "mu": [ + null, -5.27, -5.27, -5.27, -5.27, -5.27, -5.27, + -5.27, -5.27, -5.27, 0.00, null, 0.00 + ], + "sigma": [ + null, 1.54, 1.54, 1.54, 1.54, 1.54, 1.54, + 1.54, 1.54, 1.54, 0.00, null, 0.00 + ] + } +} diff --git a/mod17/data/MOD17_BPLUT_prior_20221223.json b/mod17/data/MOD17_BPLUT_prior_20221223.json new file mode 100644 index 0000000..d331e7c --- /dev/null +++ b/mod17/data/MOD17_BPLUT_prior_20221223.json @@ -0,0 +1,101 @@ +{ + "description": "Fixed issue with SLA prior means, std. dev. propagated from TRY; Adjusted SLA for Shrubs; added Triangular priors for allometric ratios; SLA is based on TRY data in C units and froot_leaf_ratio is fixed at C6.1 values", + "LUE_max": { + "mu": [ + null, 0.00098, 0.0014 , 0.00123, 0.00168, 0.00143, 0.0008 , + 0.00074, 0.00093, 0.00093, 0.00119, null, 0.00194 + ], + "sigma": [ + null, 0.00032, 0.0002 , 0.0002 , 0.00035, 0.00037, 0.00038, + 0.00021, 0.00037, 0.00038, 0.00045, null, 0.00055 + ] + }, + "SLA": { + "mu": [ + null, 2.75, 3.21, 3.21, 3.61, 3.56, 3.21, + 3.21, 3.60, 3.58, 3.60, null, 3.72 + ], + "sigma": [ + null, 0.779, 0.452, 0.273, 0.626, 0.581, 0.310, + 0.310, 0.696, 0.526, 0.543, null, 0.599 + ] + }, + "Q10_livewood": { + "mu": [ + null, 1.89, 1.84, 1.89, 1.84, 1.84, 1.84, + 1.84, 1.84, 1.84, 0.00, null, 0.00 + ], + "sigma": [ + null, 0.27, 0.22, 0.27, 0.22, 0.22, 0.22, + 0.22, 0.22, 0.22, 0.00, null, 0.00 + ] + }, + "Q10_froot": { + "mu": [ + null, 1.60, 1.60, 1.60, 1.60, 1.60, 1.60, + 1.60, 1.60, 1.60, 1.60, null, 1.60 + ], + "sigma": [ + null, 1.60, 1.60, 1.60, 1.60, 1.60, 1.60, + 1.60, 1.60, 1.60, 1.60, null, 1.60 + ] + }, + "froot_leaf_ratio": { + "c": [ + null, 1.2, 1.1, 1.7, 1.1, 1.1, 1.0, + 1.3, 1.8, 1.8, 2.6,null, 2.0 + ], + "lower": [ + null, 0.8, 0.7, 1.3, 0.7, 0.7, 0.6, + 0.9, 1.4, 1.4, 2.2,null, 1.8 + ], + "upper": [ + null, 1.6, 1.5, 2.1, 1.5, 1.5, 1.4, + 1.7, 2.2, 2.2, 3.0,null, 2.4 + ] + }, + "livewood_leaf_ratio": { + "c": [ + null, 0.182, 0.162, 0.165, 0.203, 0.203, 0.079, + 0.040, 0.091, 0.051, 0.0 , null, 0.0 + ], + "lower": [ + null, 0.142, 0.122, 0.125, 0.163, 0.163, 0.039, + 0.001, 0.051, 0.011, 0.0 , null, 0.0 + ], + "upper": [ + null, 0.222, 0.202, 0.205, 0.243, 0.243, 0.199, + 0.080, 0.131, 0.091, 0.0 , null, 0.0 + ] + }, + "leaf_mr_base": { + "mu": [ + null, -4.65, -4.65, -4.35, -4.35, -4.56, -4.36, + -4.36, -4.36, -4.36, -4.03, null, -3.78 + ], + "sigma": [ + null, 0.668, 0.668, 0.546, 0.546, 0.614, 0.704, + 0.704, 0.704, 0.704, 0.536, null, 0.855 + ] + }, + "froot_mr_base": { + "mu": [ + null, -4.21, -4.21, -4.21, -4.21, -4.21, -4.21, + -4.21, -4.21, -4.21, -4.21, null, -4.21 + ], + "sigma": [ + null, 0.84, 0.84, 0.84, 0.84, 0.84, 0.84, + 0.84, 0.84, 0.84, 0.84, null, 0.84 + ] + }, + "livewood_mr_base": { + "mu": [ + null, -5.27, -5.27, -5.27, -5.27, -5.27, -5.27, + -5.27, -5.27, -5.27, 0.00, null, 0.00 + ], + "sigma": [ + null, 1.54, 1.54, 1.54, 1.54, 1.54, 1.54, + 1.54, 1.54, 1.54, 0.00, null, 0.00 + ] + } +} diff --git a/mod17/data/MOD17_calibration_config.json b/mod17/data/MOD17_calibration_config.json new file mode 100644 index 0000000..de3d510 --- /dev/null +++ b/mod17/data/MOD17_calibration_config.json @@ -0,0 +1,38 @@ +{ + "description": "Calibration against MERRA-2 1980-2000 synthetic year and using priors with: Fixed froot_leaf_ratio, SLA based on carbon units", + "BPLUT": { + "GPP": "/home/arthur.endsley/src/mod17/lib/python/mod17/data/MOD17_BPLUT_CX.X_MERRA_NASA.csv", + "NPP": "/home/arthur.endsley/DATA/20220503_MOD17_GPP_recal_BPLUT.csv" + }, + "data": { + "file": "/anx_lagr4/MODIS_VIIRS/calibration/VIIRS_MOD16_MOD17_tower_site_drivers_v9.h5", + "sites_blacklisted": [ + "CN-Do1", "CN-Do2", "CN-Do3", "IT-Vig", "NL-Hor", "US-ORv", "US-WPT" + ] + }, + "optimization": { + "backend_template": "/home/arthur.endsley/20221027_MOD17_%s_calibration_PFT%d.nc4", + "prior": "/usr/local/dev/mod17/lib/python/mod17/data/MOD17_BPLUT_prior_20221027.json", + "chains": 3, + "draws": 100000, + "tune": "scaling", + "scaling": 1e-3, + "objective": "RMSE", + "bounds": { + "LUE_max": [0, 0.005], + "tmin0": [-35, 0], + "tmin1": [0, 25], + "vpd0": [0, 1000], + "vpd1": [1000, 8000], + "SLA": [1, 200], + "Q10": [0, 10], + "Q10_froot": [0, 10], + "Q10_livewood": [0, 10], + "froot_leaf_ratio": [0, 4], + "livewood_leaf_ratio": [0.01, 1.00], + "leaf_mr_base": [0, 0.1], + "froot_mr_base": [0, 0.1], + "livewood_mr_base": [0, 0.1] + } + } +} diff --git a/mod17/data/VNP17_BPLUT_prior_20220706.json b/mod17/data/VNP17_BPLUT_prior_20220706.json new file mode 100644 index 0000000..18f1c22 --- /dev/null +++ b/mod17/data/VNP17_BPLUT_prior_20220706.json @@ -0,0 +1,121 @@ +{ + "description": "Initial prior for VNP17 GPP and NPP models, extending 20220621 prior, but informed by MOD17 calibration", + "LUE_max": { + "mu": [ + null, 0.00105, 0.00133, 0.00125, 0.00158, 0.00140, 0.00083, + 0.00075, 0.00096, 0.00106, 0.00126, null, 0.00181 + ], + "sigma": [ + null, 0.00027, 0.00017, 0.00019, 0.00030, 0.00031, 0.00035, + 0.00021, 0.00030, 0.00031, 0.00037, null, 0.00044 + ] + }, + "tmin1": { + "mu": [ + null, 11.2, 13.1, 11.5, 14.7, 12.8, 12.8, + 12.2, 12.7, 11.8, 11.9, null, 13.8 + ], + "sigma": [ + null, 7.00, 7.17, 6.89, 6.89, 7.01, 7.17, + 7.07, 7.04, 7.03, 7.08, null, 7.16 + ] + }, + "vpd1": { + "mu": [ + null, 3566, 3516, 2098, 3124, 2979, 3438, + 4193, 3521, 3897, 4004, null, 4273 + ], + "sigma": [ + null, 1190, 1094, 463, 949, 975, 1292, + 1629, 1268, 1353, 1533, null, 1529 + ] + }, + "SLA": { + "mu": [ + null, 2.75, 3.21, 3.21, 3.61, 3.56, 3.21, + 3.21, 3.60, 3.58, 3.60, null, 3.72 + ], + "sigma": [ + null, 0.779, 0.452, 0.273, 0.626, 0.581, 0.310, + 0.310, 0.696, 0.526, 0.543, null, 0.599 + ] + }, + "Q10_livewood": { + "mu": [ + null, 1.89, 1.84, 1.89, 1.84, 1.84, 1.84, + 1.84, 1.84, 1.84, 0.00, null, 0.00 + ], + "sigma": [ + null, 0.27, 0.22, 0.27, 0.22, 0.22, 0.22, + 0.22, 0.22, 0.22, 0.00, null, 0.00 + ] + }, + "Q10_froot": { + "mu": [ + null, 1.60, 1.60, 1.60, 1.60, 1.60, 1.60, + 1.60, 1.60, 1.60, 1.60, null, 1.60 + ], + "sigma": [ + null, 1.60, 1.60, 1.60, 1.60, 1.60, 1.60, + 1.60, 1.60, 1.60, 1.60, null, 1.60 + ] + }, + "froot_leaf_ratio": { + "c": [ + null, 1.2, 1.1, 1.7, 1.1, 1.1, 1.0, + 1.3, 1.8, 1.8, 2.6,null, 2.0 + ], + "lower": [ + null, 0.8, 0.7, 1.3, 0.7, 0.7, 0.6, + 0.9, 1.4, 1.4, 2.2,null, 1.8 + ], + "upper": [ + null, 1.6, 1.5, 2.1, 1.5, 1.5, 1.4, + 1.7, 2.2, 2.2, 3.0,null, 2.4 + ] + }, + "livewood_leaf_ratio": { + "c": [ + null, 0.182, 0.162, 0.165, 0.203, 0.203, 0.079, + 0.040, 0.091, 0.051, 0.0 , null, 0.0 + ], + "lower": [ + null, 0.142, 0.122, 0.125, 0.163, 0.163, 0.039, + 0.001, 0.051, 0.011, 0.0 , null, 0.0 + ], + "upper": [ + null, 0.222, 0.202, 0.205, 0.243, 0.243, 0.199, + 0.080, 0.131, 0.091, 0.0 , null, 0.0 + ] + }, + "leaf_mr_base": { + "mu": [ + null, -4.65, -4.65, -4.35, -4.35, -4.56, -4.36, + -4.36, -4.36, -4.36, -4.03, null, -3.78 + ], + "sigma": [ + null, 0.668, 0.668, 0.546, 0.546, 0.614, 0.704, + 0.704, 0.704, 0.704, 0.536, null, 0.855 + ] + }, + "froot_mr_base": { + "mu": [ + null, -4.21, -4.21, -4.21, -4.21, -4.21, -4.21, + -4.21, -4.21, -4.21, -4.21, null, -4.21 + ], + "sigma": [ + null, 0.84, 0.84, 0.84, 0.84, 0.84, 0.84, + 0.84, 0.84, 0.84, 0.84, null, 0.84 + ] + }, + "livewood_mr_base": { + "mu": [ + null, -5.27, -5.27, -5.27, -5.27, -5.27, -5.27, + -5.27, -5.27, -5.27, 0.00, null, 0.00 + ], + "sigma": [ + null, 1.54, 1.54, 1.54, 1.54, 1.54, 1.54, + 1.54, 1.54, 1.54, 0.00, null, 0.00 + ] + } +} diff --git a/mod17/data/VNP17_calibration_config.json b/mod17/data/VNP17_calibration_config.json new file mode 100644 index 0000000..427dce5 --- /dev/null +++ b/mod17/data/VNP17_calibration_config.json @@ -0,0 +1,38 @@ +{ + "description": "Calibration against MERRA-2 1980-2000 climatology and using priors with: Fixed froot_leaf_ratio, SLA based on carbon units", + "BPLUT": { + "GPP": "/home/arthur.endsley/DATA/20220503_MOD17_GPP_recal_BPLUT.csv", + "NPP": "/home/arthur.endsley/DATA/20220707_VNP17_GPP_recal_BPLUT.csv" + }, + "data": { + "file": "/anx_lagr4/MODIS_VIIRS/calibration/VIIRS_MOD16_MOD17_tower_site_drivers_v9.h5", + "sites_blacklisted": [ + "CN-Do1", "CN-Do2", "CN-Do3", "IT-Vig", "NL-Hor", "US-ORv", "US-WPT" + ] + }, + "optimization": { + "backend_template": "/home/arthur.endsley/20221226_VNP17_%s_calibration_PFT%d.nc4", + "prior": "/home/arthur.endsley/src/mod17/lib/python/mod17/data/VNP17_BPLUT_prior_20220706.json", + "chains": 3, + "draws": 220000, + "tune": "scaling", + "scaling": 1e-3, + "objective": "RMSE", + "bounds": { + "LUE_max": [0, 0.005], + "tmin0": [-35, 0], + "tmin1": [0, 25], + "vpd0": [0, 1000], + "vpd1": [1000, 8000], + "SLA": [1, 200], + "Q10": [0, 10], + "Q10_froot": [0, 10], + "Q10_livewood": [0, 10], + "froot_leaf_ratio": [0, 4], + "livewood_leaf_ratio": [0.01, 1.00], + "leaf_mr_base": [0, 0.1], + "froot_mr_base": [0, 0.1], + "livewood_mr_base": [0, 0.1] + } + } +} diff --git a/mod17/science.py b/mod17/science.py new file mode 100644 index 0000000..598c78f --- /dev/null +++ b/mod17/science.py @@ -0,0 +1,202 @@ +import warnings +import numpy as np +from typing import Sequence + +def climatology365(series: Sequence, dates: Sequence) -> Sequence: + ''' + Computes a 365-day climatology for different locations from a time series + of length T. Ignores leap days. The climatology could then be indexed + using ordinals generated by `ordinals365()`. + + Parameters + ---------- + series : numpy.ndarray + T x ... array of data + dates : list or tuple + Sequence of datetime.datetime or datetime.date instances + + Returns + ------- + numpy.ndarray + ''' + # Get first and last day of the year (DOY) + ordinal = np.array([ + # Finally, subtract 1 from each day in a leap year after Leap Day + (doy - 1) if ((dates[i].year % 4 == 0) and doy >= 60) else doy + for i, doy in enumerate([ + # Next, fill in 0 wherever Leap Day occurs + 0 if (dates[i].year % 4 == 0 and doy == 60) else doy + for i, doy in enumerate([ + # First, convert datetime.datetime to ordinal day-of-year (DOY) + int(dt.strftime('%j')) for dt in dates + ]) + ]) + ]) + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + return np.array([ + np.nanmean(series[ordinal == day,...], axis = 0) + for day in range(1, 366) + ]) + + +def daynight_partition(arr_24hr, updown, reducer = 'mean', missing = (0, 0)): + ''' + Partitions a 24-hour time series array into daytime and nighttime values, + then calculates the mean in each group. Daytime is defined as when the sun + is above the horizon; nighttime is the complement. + + NOTE: If the sun never rises, the "daytime" average is computed over 24 + hours, anyway. In these cases, "nighttime" average is identical to the + "daytime" average (i.e., 24 hours of night). If the sun is always above + the horizon (i.e., sun never sets) on a given day, the "daytime" average + is computed as expected (over 24 hours) and the nighttime average is + missing and is instead filled with the second element of the `missing` + argument's sequence. + + Parameters + ---------- + arr_24hr : numpy.ndarray + A size (24 x ...) array; the first axis must have 24 elements + corresponding to the measurement in each hour + updown: numpy.ndarray + A size (2 x ...) array, compatible with arr_24hr, where the first axis + has the hour of sunrise and sunset, in that order, for each element + reducer : str + One of "mean" or "sum" indicating whether an average or a total of the + daytime/ nighttime values should be calculated; e.g., for "mean", the + hourly values from daytime hours are added up and divided by the + length of the day (in hours). + missing : tuple + Values to use when the sun is always below or above the horizon for 24 + hours (i.e., never rises or never sets); first value is ignored (may + be used to fill missing daylight hours in some future version), and + the second value is used to fill in missing nighttime hours + (Default: `(0, 0)`). + + Returns + ------- + numpy.ndarray + A size (2 x ...) array where the first axis enumerates the daytime and + nighttime mean values, respectively + ''' + assert reducer in ('mean', 'sum'),\ + 'Argument "reducer" must be one of: "mean", "sum"' + # Prepare single-valued output array + arr_daytime = np.zeros(arr_24hr.shape[1:]) + arr_nighttime = arr_daytime.copy() + daylight_hrs = arr_daytime.copy().astype(np.int16) + # Do sunrise and sunset define an interval? (Sunset > Sunrise)? + inside_interval = np.apply_along_axis(lambda x: x[1] > x[0], 0, updown) + # Or is the sun never up? + never_up = np.logical_and(updown[0,...] == -1, updown[1,...] == -1) + # Iteratively sum daytime VPD and temperature values + for hr in range(0, 24): + # Given only hour of sunrise/set on a 24-hour clock... + # if sun rises and sets on same day: SUNRISE <= HOUR <= SUNSET; + # if sun sets on next day: either SUNRISE <= HOUR or HOUR <= SUNSET; + sun_is_up = np.logical_or( # Either... + np.logical_and(inside_interval, # ...Rises and sets same day + np.logical_and(updown[0,...] <= hr, hr <= updown[1,...])), + np.logical_and(~inside_interval, # ...Sets on next day + np.logical_or(updown[0,...] <= hr, hr <= updown[1,...]))) + # For simplicity, compute a 24-hour mean even if the sun never rises; + # there's no way to know what the "correct" daytime value is + mask = np.logical_or(never_up, sun_is_up) + np.add(np.where( + mask, arr_24hr[hr,...], 0), arr_daytime, out = arr_daytime) + np.add(np.where( + ~mask, arr_24hr[hr,...], 0), arr_nighttime, out = arr_nighttime) + # Keep track of the denominator (hours) for calculating the mean; + # note that this over-estimates actual daylight hours by 1 hour + # but results in the correct denominator for the sums above + np.add(np.where(mask, 1, 0), daylight_hrs, out = daylight_hrs) + arr_24hr = None + # Calculate mean quantities + if reducer == 'mean': + arr_daytime = np.divide(arr_daytime, daylight_hrs) + arr_nighttime = np.divide(arr_nighttime, 24 - daylight_hrs) + # For sites where the sun is always above/ below the horizon, set missing + # nighttime values to zero + arr_nighttime[~np.isfinite(arr_nighttime)] = missing[1] + return np.stack((arr_daytime, arr_nighttime)) + + +def degree_lengths(phi, a = 6378137, b = 6356752.3142): + ''' + Returns the approximate length of degrees of latitude and longitude. + Source: + + https://en.wikipedia.org/wiki/Latitude + + phi : Number + Latitude, in degrees + a : Number + Radius of the Earth (major axis) in meters + b : Number + Length of minor axis of the Earth in meters + + Returns + ------- + tuple + Length of a degree of (longitude, latitude), respectively + ''' + e2 = ((a**2) - (b**2)) / (a**2) + # Approximate length of a degree of latitude + lat_m = 111132.954 - (559.822 * np.cos(2 * np.deg2rad(phi))) +\ + (1.175 * np.cos(4 * np.deg2rad(phi))) + lng_m = (np.pi * a * np.cos(np.deg2rad(phi))) / ( + 180 * np.sqrt(1 - (e2 * np.sin(np.deg2rad(phi))**2))) + return (lng_m, lat_m) + + +def nash_sutcliffe( + predicted: Sequence, observed: Sequence, norm: bool = False) -> float: + ''' + Computes the Nash-Sutcliffe efficiency of a model's predictions. + + $$ + \mathrm{NSE} = 1 - + \frac{\sum_i (y_i - \hat{y}_i)^2}{\sum_i (y_i - \bar{y}_i)^2} + $$ + + Parameters + ---------- + predicted : Sequence + Predicted values + observed : Sequence + Observed values + norm : bool + True to return the normalized Nash-Sutcliffe efficiency + (Default: False) + + Returns + ------- + float + ''' + nse = 1 - (np.nansum(np.power(observed - predicted, 2)) /\ + np.nansum(np.power(observed - np.nanmean(observed), 2))) + if norm: + return 1 / (2 - nse) # Return normalized NSE + return nse + + + +def ordinals365(dates: Sequence) -> Sequence: + ''' + Returns a length-T sequence of ordinals on [1,365]. Can be used for + indexing a 365-day climatology; see `climatology365()`. + + Parameters + ---------- + dates : list or tuple + Sequence of datetime.datetime or datetime.date instances + + Returns + ------- + list + ''' + return [ + t - 1 if (year % 4 == 0 and t >= 60) else t + for t, year in [(int(t.strftime('%j')), t.year) for t in dates] + ] diff --git a/mod17/sensitivity.py b/mod17/sensitivity.py new file mode 100644 index 0000000..6abbbf3 --- /dev/null +++ b/mod17/sensitivity.py @@ -0,0 +1,328 @@ +''' +Performs the Sobol' sensitivity analysis for the MOD17 GPP and NPP models. + +It's not clear that treating driver data as parameters is appropriate, here, +given that the NSE is calculated based on observed fluxes. +''' + +import json +import os +import warnings +import numpy as np +import h5py +import mod17 +from tqdm import tqdm +from mod17 import MOD17 +from mod17.utils import restore_bplut, pft_dominant +from mod17.science import nash_sutcliffe +from SALib.sample import saltelli +from SALib.analyze import sobol + +OUTPUT_TPL = '/home/arthur.endsley/MOD17_sensitivity_%s_analysis.json' + + +def main(model, config_file, pft = None): + ''' + Conducts the Sobol' sensitivity analysis for the GPP, NPP models. + + Parameters + ---------- + model : str + pft : int + ''' + with open(config_file, 'r') as file: + config = json.load(file) + if model.lower() == 'gpp': + saltelli_gpp(config, pft) + elif model.lower() == 'npp': + saltelli_npp(config, pft) + elif model.lower() == 'npp2': + saltelli_npp_and_gpp(config, pft) + else: + raise ValueError('Did not recognize model "%s"' % model) + + +def saltelli_gpp(config, pft = None): + ''' + Sensitivity analysis for the GPP model. + + Parameters + ---------- + pft : int + ''' + drivers, gpp = load_gpp_data(config, None, pft, validation_mask_only = True) + params = MOD17.required_parameters[0:5] + problem = { + 'num_vars': len(params), + 'names': params, + 'bounds': [ + config['optimization']['bounds'][p] + for p in params + ] + } + # NOTE: Number of samples must be a power of 2 + param_sweep = saltelli.sample(problem, 256) + Y = np.zeros([param_sweep.shape[0]]) + for i, X in enumerate(tqdm(param_sweep)): + yhat = MOD17._gpp(X, *drivers[0:4]) + Y[i] = nash_sutcliffe(yhat, gpp, norm = True) + metrics = sobol.analyze(problem, Y) + name = 'GPP' if pft is None else 'GPP-PFT%d' % pft + with open(OUTPUT_TPL % name, 'w') as file: + json.dump(dict([(k, v.tolist()) for k, v in metrics.items()]), file) + + +def saltelli_npp(config, pft = None): + ''' + Sensitivity analysis for the NPP model. + + Parameters + ---------- + pft : int + ''' + # Now for NPP; we have to load the BPLUT with the static parameters + bplut = restore_bplut(config['BPLUT']['NPP']) + gpp_params = [ + np.nanmean(bplut[p]) for p in MOD17.required_parameters[0:5] + ] + drivers, npp = load_npp_data(config, None, pft) + params = MOD17.required_parameters[5:] + if pft in (10, 12): + params = list(set(params).difference( + ('Q10_livewood', 'livewood_leaf_ratio', 'livewood_mr_base'))) + problem = { + 'num_vars': len(params), + 'names': params, + 'bounds': [ + config['optimization']['bounds'][p] for p in params + ] + } + # NOTE: Number of samples must be a power of 2 + samples = 4096 if pft == 3 else 1024 + samples = 2048 if pft in (10, 12) else samples + param_sweep = saltelli.sample(problem, samples) + Y = np.zeros([param_sweep.shape[0]]) + # Index the livewood parameters + idx = np.array([ + MOD17.required_parameters[5:].index(p) + for p in ('Q10_livewood', 'livewood_leaf_ratio', 'livewood_mr_base') + ]) + # There is no generalizable way to index and insert the livewood + # parameters because one of them comes at the end of the array and + # np.insert() will not work in that case; so, let's assert we have the + # right indices and make this part hard-coded + assert (idx == [1,4,7]).all() + for i, X in enumerate(tqdm(param_sweep)): + if pft in (10, 12): + # Even more confusingly, have to increment each successive index + # because of the progressive insertion + X0 = X.copy() + for j in idx: + X = np.insert(X, j, 0) + yhat = MOD17._npp(np.array([*gpp_params, *X]), *drivers) + Y[i] = nash_sutcliffe(yhat, npp, norm = True) + metrics = sobol.analyze(problem, Y) + name = 'NPP' if pft is None else 'NPP-PFT%d' % pft + with open(OUTPUT_TPL % name, 'w') as file: + if pft in (10, 12): + output = dict() + for key, value in metrics.items(): + for j in idx: + value = np.insert(value, j, 0) + output[key] = value.tolist() + else: + output = dict([(k, v.tolist()) for k, v in metrics.items()]) + json.dump(output, file) + + +def saltelli_npp_and_gpp(config, pft = None): + ''' + Sensitivity analysis for the NPP model, including the GPP parameters + in the analysis. + + Parameters + ---------- + pft : int + ''' + # Now for NPP; we have to load the BPLUT with the static parameters + bplut = restore_bplut(config['BPLUT']['NPP']) + print('Loading data...') + drivers, npp = load_npp_data(config, None, pft) + print('Setting up experiments...') + params = MOD17.required_parameters + if pft in (10, 12): + params = list(set(params).difference( + ('Q10_livewood', 'livewood_leaf_ratio', 'livewood_mr_base'))) + problem = { + 'num_vars': len(params), + 'names': params, + 'bounds': [ + config['optimization']['bounds'][p] for p in params + ] + } + # NOTE: Number of samples must be a power of 2 + samples = 2048 + if pft is not None: + samples = 4096 if pft == 3 else 1024 + samples = 2048 if pft in (10, 12) else samples + param_sweep = saltelli.sample(problem, samples) + Y = np.zeros([param_sweep.shape[0]]) + # Index the livewood parameters + idx = np.array([ + MOD17.required_parameters[5:].index(p) + for p in ('Q10_livewood', 'livewood_leaf_ratio', 'livewood_mr_base') + ]) + # There is no generalizable way to index and insert the livewood + # parameters because one of them comes at the end of the array and + # np.insert() will not work in that case; so, let's assert we have the + # right indices and make this part hard-coded + assert (idx == [1,4,7]).all() + for i, X in enumerate(tqdm(param_sweep)): + if pft in (10, 12): + # Even more confusingly, have to increment each successive index + # because of the progressive insertion + X0 = X.copy() + for j in idx: + X = np.insert(X, j, 0) + yhat = MOD17._npp(X, *drivers) + Y[i] = nash_sutcliffe(yhat, npp, norm = True) + metrics = sobol.analyze(problem, Y) + name = 'NPP' if pft is None else 'NPP-PFT%d' % pft + with open(OUTPUT_TPL % name, 'w') as file: + if pft in (10, 12): + output = dict() + for key, value in metrics.items(): + for j in idx: + value = np.insert(value, j, 0) + output[key] = value.tolist() + else: + output = dict([(k, v.tolist()) for k, v in metrics.items()]) + json.dump(output, file) + + +def load_gpp_data( + config, filename = None, pft = None, subgrid = False, verbose = True, + validation_mask_only = False): + ''' + Loads the data required for running a sensitivity analysis on the + MOD17 GPP model. + + Parameters + ---------- + config : dict + filename : str or None + pft : int + subgrid : bool + verbose : bool + validation_mask_only : bool + + Returns + ------- + tuple + A 3-element tuple: A sequence of driver datasets (first element) and + the array of valid GPP observations (second element), and the mask + ''' + if verbose: + print('Loading GPP datasets...') + if filename is None: + filename = config['data']['file'] + with h5py.File(filename, 'r') as hdf: + sites = hdf['FLUXNET/site_id'][:].tolist() + if hasattr(sites[0], 'decode'): + sites = [s.decode('utf-8') for s in sites] + # NOTE: Converting from Kelvin to Celsius + tday = hdf['MERRA2/T10M_daytime'][:] - 273.15 + qv10m = hdf['MERRA2/QV10M_daytime'][:] + ps = hdf['MERRA2/PS_daytime'][:] + drivers = [ # fPAR, Tmin, VPD, PAR, LAI, Tmean, years + hdf['MODIS/MOD15A2HGF_fPAR_interp'][:], + hdf['MERRA2/Tmin'][:] - 273.15, + MOD17.vpd(qv10m, ps, tday), + MOD17.par(hdf['MERRA2/SWGDN'][:]), + ] + observed_gpp = hdf['FLUXNET/GPP'][:] + is_test = hdf['FLUXNET/validation_mask'][:].sum(axis = 0).astype(bool) + if pft is not None: + blacklist = config['data']['sites_blacklisted'] + pft_map = pft_dominant(hdf['state/PFT'][:], site_list = sites) + pft_mask = np.logical_and(pft_map == pft, ~np.in1d(sites, blacklist)) + drivers = [d[:,pft_mask] for d in drivers] + observed_gpp = observed_gpp[:,pft_mask] + # Set negative VPD to zero + drivers[2] = np.where(drivers[2] < 0, 0, drivers[2]) + # Convert fPAR from (%) to [0,1] + if subgrid: + drivers[0] = drivers[0] * 0.01 + else: + # Average over the subgrid + drivers[0] = np.nanmean(drivers[0], axis = -1) * 0.01 + # Speed things up by focusing only on data points where valid data exist + mask = ~np.isnan(observed_gpp) + if validation_mask_only: + # Stratify the data using the validation mask so that an equal number + # of samples from each PFT are used + mask = np.logical_and(is_test, mask) + drivers = [d[mask] for d in drivers] + return (drivers, observed_gpp[mask], mask) + + +def load_npp_data( + config, filename = None, pft = None, subgrid = False, verbose = True): + ''' + Loads the data required for running a sensitivity analysis on the + MOD17 NPP model. + + Parameters + ---------- + config : dict + filename : str + pft : int + subgrid : bool + verbose : bool + + Returns + ------- + tuple + A 2-element tuple: A sequence of driver datasets (first element) and + the array of valid NPP observations (second element) + ''' + if verbose: + print('Loading NPP datasets...') + if filename is None: + filename = config['data']['file'] + with h5py.File(filename, 'r') as hdf: + sites = hdf['NPP/site_id'][:].tolist() + if hasattr(sites[0], 'decode'): + sites = [s.decode('utf-8') for s in sites] + # NOTE: Converting from Kelvin to Celsius + tday = hdf['NPP/surface_met_MERRA2/T10M_daytime'][:] - 273.15 + qv10m = hdf['NPP/surface_met_MERRA2/QV10M_daytime'][:] + ps = hdf['NPP/surface_met_MERRA2/PS_daytime'][:] + drivers = [ # fPAR, Tmin, VPD, PAR, LAI, Tmean, years + hdf['NPP/MOD15A2H_fPAR_clim_filt'][:], + hdf['NPP/surface_met_MERRA2/Tmin'][:] - 273.15, + MOD17.vpd(qv10m, ps, tday), + MOD17.par(hdf['NPP/surface_met_MERRA2/SWGDN'][:]), + hdf['NPP/MOD15A2H_LAI_clim_filt'][:], + hdf['NPP/surface_met_MERRA2/T10M'][:] - 273.15, + np.full(ps.shape, 1) # i.e., A 365-day climatological year ("Year 1") + ] + observed_npp = hdf['NPP/NPP_total'][:] + if pft is not None: + blacklist = config['data']['sites_blacklisted'] + pft_map = hdf['NPP/PFT'][:] + drivers = [d[:,pft_map == pft] for d in drivers] + observed_npp = observed_npp[pft_map == pft] + # Set negative VPD to zero + drivers[2] = np.where(drivers[2] < 0, 0, drivers[2]) + # Convert fPAR from (%) to [0,1] and re-scale LAI; reshape fPAR and LAI + drivers[0] = np.nanmean(drivers[0], axis = -1) * 0.01 + drivers[4] = np.nanmean(drivers[4], axis = -1) * 0.1 + return (drivers, observed_npp) + + +if __name__ == '__main__': + import fire + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + fire.Fire(main) diff --git a/mod17/srs.py b/mod17/srs.py new file mode 100644 index 0000000..7285dcb --- /dev/null +++ b/mod17/srs.py @@ -0,0 +1,215 @@ +''' +MODIS sinusoidal projection forward and backward coordinate transformations, +courtesy of Giglio et al. (2018), Collection 6 MODIS Burned Area Product +User's Guide, Version 1, Appendix B: + + https://modis-fire.umd.edu/files/MODIS_C6_BA_User_Guide_1.2.pdf +''' + +import h5py +import re +import numpy as np +from numbers import Number +from typing import Sequence, Tuple, Iterable + +SPHERE_RADIUS = 6371007.181 # Radius of ideal sphere, meters +TILE_LINES = {250: 5000, 500: 2400, 1000: 1200} # Num. lines by nominal res. +TILE_SIZE = 1111950 # Width and height of MODIS tile in projection plane +XMIN = -20015109 # Western limit of projection plane, meters +YMAX = 10007555 # Nothern limit of projection plane, meters +VIIRS_METADATA = re.compile( + r'.*XDim=(?P\d+)' + r'.*YDim=(?P\d+)' + r'.*UpperLeftPointMtrs=\((?P
    [0-9,\-\.]+)\)' + r'.*LowerRightMtrs=\((?P[0-9,\-\.]+)\).*', re.DOTALL) +# Taken from a MOD15A2H EOS-HDF file; this seems to fit best +MODIS_SINUSOIDAL_PROJ_WKT = ''' +PROJCS["unnamed",GEOGCS["Unknown datum based upon the custom spheroid", + DATUM["Not specified (based on custom spheroid)", + SPHEROID["Custom spheroid",6371007.181,0]],PRIMEM["Greenwich",0], + UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]]], + PROJECTION["Sinusoidal"], + PARAMETER["longitude_of_center",0], + PARAMETER["false_easting",0], + PARAMETER["false_northing",0], + UNIT["Meter",1],AXIS["Easting",EAST], + AXIS["Northing",NORTH]] +'''.replace('\n', '') + + +def geotransform( + hdf: h5py.File, + ps: float = 463.31271653, + nrow: int = 2400, + ncol: int = 2400, + metadata = VIIRS_METADATA + ) -> Iterable[Tuple[Number, Number, Number, Number, Number, Number]]: + ''' + Prescribe a geotransform tuple for the output GeoTIFF. For MODIS/VIIRS + sinsuoidal projections, the lower right corner coordinates are "the only + metadata that accurately reflect the extreme corners of the gridded image" + (Myneni et al. 2018, VIIRS LAI/fPAR User Guide). So, instead of using the + reported upper-left (UL) corner coordinates, it is more accurate to use + the lower-right (LR) corner coordinates and calculate the position of the + UL corner based on the width and height of the image and the pixel size. + NOTE that a rather odd pixel size is required to get the correct results + verified against the HDF-EOS-to-GeoTIFF (HEG) Conversion Tool; see also + Giglio et al. (2018), "Collection 6 MODIS Burned Area Product User's + Guide, Version 1" Table 1. + + https://modis-land.gsfc.nasa.gov/pdf/MODIS_C6_BA_User_Guide_1.2.pdf + + Parameters + ---------- + hdf : h5py.File + ps : int or float + The pixel size; in units matching the linear units of the SRS + (Default: 463.3127 meters) + nrow : int + Number of rows in the output image (Default: 2400 for MODIS/VIIRS) + ncol : int + Number of columns in the output image (Default: 2400 for MODIS/VIIRS) + metadata : re.Pattern + Compiled regex that captures important metadata fields + + Returns + ------- + tuple + (x_min, pixel_width, 0, y_max, 0, -pixel_height) + ''' + meta = hdf['HDFEOS INFORMATION/StructMetadata.0'][()].decode() + lr = VIIRS_METADATA.match(meta).groupdict()['lr'].split(',') + return ( # Subtract distance (meters) from LR corner to obtain UR corner + float(lr[0]) - (ncol * ps), ps, 0, float(lr[1]) + (nrow * ps), 0, -ps) + + +def modis_from_wgs84(coords: Sequence) -> Sequence: + ''' + Given longitude-latitude coordinates, return the coordinates on the + sinusoidal projection plane. + + Parameters + ---------- + coords : tuple or list or numpy.ndarray + (Longitude, Latitude) coordinate pair; for a numpy.ndarray, assumes + that this is described by the first axis, which is length 2 + + Returns + ------- + numpy.ndarray + (X, Y) coordinate pair in MODIS sinusoidal projection; a (2 x ...) array + ''' + x, y = np.deg2rad(coords) + return np.stack((SPHERE_RADIUS * x * np.cos(y), SPHERE_RADIUS * y)) + + +def modis_to_wgs84(coords: Sequence) -> Sequence: + ''' + Convert coordinates on the MODIS sinusoidal plane to longitude-latitude + coordinates (WGS84). + + Parameters + ---------- + coords : tuple or list or numpy.ndarray + (X, Y) coordinate pair in MODIS sinusoidal projection; for a + numpy.ndarray, assumes that this is described by the first axis, + which is length 2 + + Returns + ------- + numpy.ndarray + (Longitude, Latitude) coordinate pair; a (2 x ...) array + ''' + x, y = coords + lat = y / SPHERE_RADIUS # i.e., return value in radians + lng = x / (SPHERE_RADIUS * np.cos(lat)) + return np.stack((np.rad2deg(lng), np.rad2deg(lat))) + + +def modis_tile_from_wgs84(coords: Sequence) -> Sequence: + ''' + Given longitude-latitude coordinates, return the MODIS tile (H,V) that + contains them. + + Parameters + ---------- + coords : tuple or list or numpy.ndarray + (Longitude, Latitude) coordinate pair; for a numpy.ndarray, assumes + that this is described by the first axis, which is length 2 + + Returns + ------- + numpy.ndarray + (H,V) tile identifier; a (2 x ...) array + ''' + x, y = modis_from_wgs84(coords) # Get coordinates in the projection plane + return np.stack(( + np.floor((x - XMIN) / TILE_SIZE), + np.floor((YMAX - y) / TILE_SIZE))) + + +def modis_row_col_from_wgs84( + coords: Sequence, nominal: int = 500) -> Sequence: + ''' + Given longitude-latitude coordinates, return the corresponding row-column + coordinates within a MODIS tile. NOTE: You'll need to determine which + MODIS tile contains this row-column index with `modis_tile_from_wgs84()`. + + Parameters + ---------- + coords : tuple or list or numpy.ndarray + (Longitude, Latitude) coordinate pair in WGS84 projection + nominal : int + Nominal resolution of MODIS raster: 250 (meters), 500, or 1000 + + Returns + ------- + numpy.ndarray + (Row, Column) coordinates; a (2 x ...) array + ''' + x, y = modis_from_wgs84(coords) # Get coordinates in the projection plane + # Get actual size of, e.g., "500-m" MODIS sinusoidal cell + res = TILE_SIZE / float(TILE_LINES[nominal]) + out = np.stack(( + np.floor((((YMAX - y) % TILE_SIZE) / res) - 0.5), + np.floor((((x - XMIN) % TILE_SIZE) / res) - 0.5), + )) + # Fix some edge cases where subtracting 0.5, taking the floor leads to -1 + return np.where(out == -1, 0, out) + + +def modis_row_col_to_wgs84( + coords: Sequence, h: Number, v: Number, nominal: int = 500 + ) -> Sequence: + ''' + Convert pixel coordinates in a specific MODIS tile to longitude-latitude + coordinates. The "h" and "v" arguments if passed as arrays, must be + conformable to the "coords" array. + + Parameters + ---------- + coords : tuple or list or numpy.ndarray + (X, Y) coordinate pair in MODIS sinusoidal projection + h : int or numpy.ndarray + MODIS tile "h" index + v : int or numpy.ndarray + MODIS tile "v" index + nominal : int + Nominal resolution of MODIS raster: 250 (meters), 500, or 1000 + + Returns + ------- + numpy.ndarray + (Longitude, Latitude) coordinates; a (2 x ...) array + ''' + r, c = coords + lines = TILE_LINES[nominal] + assert np.logical_and( + np.logical_and(0 <= r, r <= (lines - 1)), + np.logical_and(0 <= c, c <= (lines - 1))).all(),\ + 'Row and column indices must be in the range [0, %d]' % (lines - 1) + # Get actual size of, e.g., "500-m" MODIS sinusoidal cell + res = TILE_SIZE / float(TILE_LINES[nominal]) + x = ((c + 0.5) * res) + (h * TILE_SIZE) + XMIN + y = YMAX - ((r + 0.5) * res) - (v * TILE_SIZE) + return modis_to_wgs84((x, y)) diff --git a/mod17/utils.py b/mod17/utils.py new file mode 100644 index 0000000..d8e701d --- /dev/null +++ b/mod17/utils.py @@ -0,0 +1,542 @@ +''' +Utilities related to the MOD17 algorithm. +''' + +import csv +import os +import numpy as np +import pandas as pd +import mod17 +from collections import Counter +from typing import Callable, Sequence, Union, BinaryIO +from pandas._typing import FilePath, ReadCsvBuffer + +BPLUT_FIELD_LOOKUP = { + 'LUEmax(KgC/m^2/d/MJ)': 'LUE_max', + 'Tmin_min(C)': 'tmin0', + 'Tmin_max(C)': 'tmin1', + 'VPD_min(Pa)': 'vpd0', + 'VPD_max(Pa)': 'vpd1', + 'SLA(LAI/KgC)': 'SLA', + 'Q10(unitless)': 'Q10', + 'Q10_livewood(unitless)': 'Q10_livewood', + 'Q10_froot(unitless)': 'Q10_froot', + 'froot_leaf_ratio': 'froot_leaf_ratio', + 'livewood_leaf_ratio': 'livewood_leaf_ratio', + 'leaf_mr_base(Kgc/KgC/d)': 'leaf_mr_base', + 'froot_mr_base(Kgc/KgC/d)': 'froot_mr_base', + 'livewood_mr_base(Kgc/KgC/d)': 'livewood_mr_base', +} + +def dec2bin_unpack(x: np.ndarray) -> np.ndarray: + ''' + Unpacks an arbitrary decimal NumPy array into a binary representation + along a new axis. Assumes decimal digits are on the interval [0, 255], + i.e., that only 8-bit representations are needed. + + Parameters + ---------- + x : numpy.ndarray + + Returns + ------- + numpy.ndarray + ''' + # Make sure the bit representation is enumerated along a new axis, the + # very last axis + axis = x.ndim + # unpackbits() returns the bit representation in big-endian order, so we + # flip the array (with -8) to get litte-endian order + return np.unpackbits(x[...,None], axis = axis)[...,-8:] + + +def haversine(p1: Sequence, p2: Sequence, radius: float = 6371e3) -> float: + ''' + Haversine formula for great circle distance, in meters. Accurate for + points separated near and far but for small distances the accuracy is + improved by providing a different radius of the sphere, say 6356.7523 km + for polar regions or 6378.1370 km for equatorial regions. Default is the + mean earth radius. + + NOTE: Distance returned is in the same units as radius. + + Parameters + ---------- + p1 : tuple or list + Sequence of two floats, longitude and latitude, respectively + p2 : tuple or list + Same as p1 but for the second point + radius : int or float + Radius of the sphere to use in distance calculation + (Default: 6,371,000 meters) + + Returns + ------- + float + ''' + x1, y1 = map(np.deg2rad, p1) + x2, y2 = map(np.deg2rad, p2) + dphi = np.abs(y2 - y1) # Difference in latitude + dlambda = np.abs(x2 - x1) # Difference in longitude + angle = 2 * np.arcsin(np.sqrt(np.add( + np.power(np.sin(dphi / 2), 2), + np.cos(y1) * np.cos(y2) * np.power(np.sin(dlambda / 2), 2) + ))) + return float(angle * radius) + + +def mod15a2h_qc_fail(x: np.ndarray) -> np.ndarray: + ''' + Returns pass/fail for QC flags based on the L4C fPAR QC protocol for the + `FparLai_QC` band: Bad pixels have either `1` in the first bit ("Pixel not + produced at all") or anything other than `00` ("clear") in bits 3-4. + Output array is True wherever the array fails QC criteria. Compare to: + + np.vectorize(lambda v: v[0] == 1 or v[3:5] != '00') + + Parameters + ---------- + x : numpy.ndarray + Array where the last axis enumerates the unpacked bits + (ones and zeros) + + Returns + ------- + numpy.ndarray + Boolean array with True wherever QC criteria are failed + ''' + y = dec2bin_unpack(x) + # Emit 1 = FAIL if these two bits are not == "00" + c1 = y[...,3:5].sum(axis = -1).astype(np.uint8) + # Emit 1 = FAIL if 1st bit == 1 ("Pixel not produced at all") + c2 = y[...,0] + # Intermediate arrays are 1 = FAIL, 0 = PASS + return (c1 + c2) > 0 + + +def pft_dominant( + pft_map: np.ndarray, site_list: list = None, + valid_pft: list = mod17.PFT_VALID): + ''' + Returns the dominant PFT type, based on the PFT mode among the sub-grid + for each site. Note that this is specific to the MOD17 calibration/ + validation (Cal/Val) protocol, i.e., two sites are always classified as + Deciduous Needleleaf (PFT 3): + + CA-SF2 + CA-SF3 + + Three other sites (CN-Do1, CN-Do3, US-WPT) are classified by the PI as + wetlands, which is not supported, nor are their MCD12Q1 classifications. + + Three other sites have hard-coded PFT classes because they are in urban + areas (PFT 13): + + IT-Vig: Re-classified to PFT 4 (as reported by PI) + NL-Hor: Re-classified to PFT 10 (as reported by PI) + SE-Abi: Re-classified to PFT 4 (as reported by PI) + + At US-ORv, it's clearly a wetland but there is a lot of closed canopy of + indeterminate composition. + + Parameters + ---------- + pft_map : numpy.ndarray + (N x M) array of PFT classes, where N is the number of sites and + M is the number of sub-grid cells (N PFT classes are returned) + site_list : list + (Optional) List of the site names; must be provided to get PFT + classes that accurately match the Cal/Val protocol + valid_pft : list + (Optional) List of valid PFT classes (Default: `mod17.PFT_VALID`) + + Returns + ------- + numpy.ndarray + An (N,) array of the dominant PFT classes + ''' + pft_dom = np.zeros(pft_map.shape[0], dtype = np.float32) + for i in range(0, pft_map.shape[0]): + try: + pft_dom[i] = Counter( + list(filter(lambda x: x in valid_pft, pft_map[i])))\ + .most_common()[0][0] + except: + # Skip those sites that have no valid PFTs + continue + if site_list is not None: + # Fill in the PI-reported PFT for troublesome sites + pft_dom[197] = 4 + pft_dom[209] = 10 + pft_dom[234] = 4 + # Fill in black-listed sites + idx = [ + site_list.index(sid) + for sid in ('CN-Do1', 'CN-Do3', 'US-WPT', 'US-ORv') + ] + pft_dom[idx] = np.nan + # For PFT==3 (DNF) use pre-determined locations + idx = [site_list.index(sid) for sid in ('CA-SF2', 'CA-SF3')] + pft_dom[idx] = 3 + # For PFT==6 (CSH) use sites with any amount of CSH pixels + idx = np.argwhere( + np.sum(pft_map == 6, axis = 1) > 0).ravel() + pft_dom[idx] = 6 + return pft_dom + + +def pft_remap( + pft_map: np.ndarray, site_list: list = None, + valid_pft: list = mod17.PFT_VALID): + ''' + Returns a map of PFTs that is consistent with the model's approved PFT + classes. Note that this is specific to the MOD17 calibration/ + validation (Cal/Val) protocol, i.e., two sites are always classified as + Deciduous Needleleaf (PFT 3): + + CA-SF2 + CA-SF3 + + Three other sites have hard-coded PFT classes because they are in urban + areas (PFT 13): + + IT-Vig: Re-classified to PFT 4 (as reported by PI) + NL-Hor: Re-classified to PFT 10 (as reported by PI) + SE-Abi: Re-classified to PFT 4 (as reported by PI) + + PFT classes that are not recognized in the MOD17 model are mapped to 0, + which is not used in the model. + + Parameters + ---------- + pft_map : numpy.ndarray + (N x M) array of PFT classes, where N is the number of sites and + M is the number of sub-grid cells (N PFT classes are returned) + site_list : list + (Optional) List of the site names; must be provided to get PFT + classes that accurately match the Cal/Val protocol + valid_pft : list + (Optional) List of valid PFT classes (Default: `mod17.PFT_VALID`) + + Returns + ------- + numpy.ndarray + An (N,M) array of the model-consistent PFT classes + ''' + output_map = pft_map.copy() + if site_list is not None: + # Fill in the PI-reported PFT for troublesome sites + output_map[197] = 4 + output_map[209] = 10 + output_map[234] = 4 + # For PFT==3, DNF, use pre-determined locations + idx = [site_list.index(sid) for sid in ('CA-SF2', 'CA-SF3')] + output_map[idx] = 3 + output_map[output_map > 12] = 0 + output_map[output_map == 11] = 0 + return output_map + + +def restore_bplut(path_or_buffer: Union[BinaryIO, str]) -> dict: + ''' + NOTE: I manually exported Maosheng's fixed-width version (fixed-width + files are a crime) to CSV for easier handling. + + Parameters + ---------- + path_or_buffer : str or buffer + File path or buffer representing the BPLUT to be read + + Returns + ------- + dict + ''' + # Remaps Maosheng's PFT order to the actual PFT code from MCD12Q1 + # LC_Type2 + pft_lookup = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12] + data = pd.read_csv(path_or_buffer) + # Create a dictionary with an array for every key + output = dict([ + (k, np.full((13,), np.nan)) + for k in BPLUT_FIELD_LOOKUP.values() + ]) + # Assumes the first column indexes the parameter/ field names + field_index = data.columns[0] + pft_index = list(data.columns) + pft_index.remove(field_index) + for k, key in enumerate(data[field_index]): + values = data.loc[data[field_index] == key, pft_index].values.ravel() + output[BPLUT_FIELD_LOOKUP[key]][pft_lookup] = values + return output + + +def write_bplut(params_dict: dict, output_path: str): + ''' + Writes a BPLUT parameters dictionary to an output CSV file. + + Parameters + ---------- + params_dict : dict + output_path : str + The output CSV file path + ''' + template = os.path.join( + os.path.dirname(mod17.__file__), 'data/MOD17_BPLUT_C5.1_MERRA_NASA.csv') + with open(template, 'r') as file: + reader = csv.reader(file) + for line in reader: + if reader.line_num > 1: + break + header = line + with open(output_path, 'w') as file: + writer = csv.writer(file) + writer.writerow(header) + for name, key in BPLUT_FIELD_LOOKUP.items(): + values = [] + for pft in mod17.PFT_VALID: + values.append(params_dict[key][pft]) + writer.writerow((name, *values)) + + +def rmsd( + params: Sequence, func: Callable = None, observed: Sequence = None, + drivers: Sequence = None) -> float: + ''' + The root-mean scquared deviation. This function is intended to be used + in a multiprocessing context (with functools.partial()). + + Parameters + ---------- + params : Sequence + Sequence of expected model parameters + func : Callable + The function to call to generate predicted values; function should + expect to receive positional arguments, the first being a sequence + of model parameters and every subsequent argument an input array + observed : Sequence + The oberved values + drivers : Sequence + Sequence of expected model drivers + + Returns + ------- + float + ''' + predicted = func(params, *drivers) + return np.sqrt(np.nanmean((predicted - observed) ** 2)) + + +def sites_by_record_length( + array: np.ndarray, dates: np.ndarray, pft_map: np.ndarray, + sites: np.ndarray, n_returned: int = 5, cutoff: float = 0.97, + pft_passed: Sequence = None) -> tuple: + ''' + Ranks sites by the total number of site-years with valid data. Returns + a tuple of (sites, site-years) where sites is the top `n_returned` site + names with the longest site-year record; site-years is a same-length + sequence of randomly chosen years, ordered by highest proportion of valid + data within the year. + + Parameters + ---------- + array : numpy.ndarray + The data record, a (T x N) array for N sites + dates : numpy.ndarray + (T x 3) array of Year, Month, Day for each time step; years must be + consecutive + pft_map : numpy.ndarray + The map of PFTs, a (N,) array for N sites with a subgrid of M cells + sites : numpy.ndarray + (N,) array of site labels + n_returned : int + Number of unique sites to return for each PFT + cutoff : float + (Optional) A cutoff for the proportion of valid data required in each + year; i.e., site-years with a proportion below this cutoff are ignored + when tallying sites by total number of site-years + pft_passed : Sequence + (Optional) A sequence of PFT codes for which the `cutoff` will not be + applied; instead, any site-year proportion above 0 will be considered; + if None, the `cutoff` is applied to all PFTs (Default: None) + + Returns + ------- + tuple + A 2-element tuple of (sites, site-years); each is a (P x Z) + `numpy.ndarray` where P is the number of unique PFTs and Z is the + `n_returned` + ''' + assert array.shape[0] == dates.shape[0],\ + 'Data array and dates array should have the same number of time points' + assert array.shape[1] == pft_map.shape[0],\ + 'Data array and PFT map should have the same number of sites' + all_years = np.unique(dates[:,0]) + site_years = np.zeros((len(all_years), pft_map.shape[0])) + for y, year in enumerate(all_years.ravel()): + # Count the number of days with valid data in each year; normalize by + # the total number of days in the year (366 for leap years) + dmax = 366 if year % 4 == 0 else 365 + site_years[y,:] = ( + dmax - np.isnan(array[dates[:,0] == year]).sum(axis = 0)) / dmax + # Ignore site-year proportions below the cutoff + _site_years = site_years.copy() + site_years = np.where(site_years < cutoff, 0, site_years) + # For simplicity, we copy the data from the original site_years array to + # the new one for those PFTs that we don't want to apply a cutoff to + if pft_passed is not None: + mask = np.repeat(pft_map[None,:], len(all_years), axis = 0) + for each_pft in pft_passed: + site_years[mask == each_pft] = _site_years[mask == each_pft] + # Tally the total "site-years" for each site + site_years_sum = site_years.sum(axis = 0) + # Get a list of unique PFTs + all_pft = np.unique(pft_map[~np.isnan(pft_map)]).tolist() + all_pft.sort() + results_sites = np.chararray( + (len(all_pft), n_returned), itemsize = 6, unicode = True) + results = np.zeros((len(all_pft), n_returned), dtype = np.int32) + for p, pft in enumerate(all_pft): + # Sort the site-year data by number of site-years + site_max = site_years_sum[pft_map == pft] + top = sites[pft_map == pft][np.argsort(site_max)][::-1] + if top.size < n_returned: + results_sites[p,0:top.size] = top + else: + results_sites[p,:] = top[0:n_returned] + # Indices of those top sites... + idx = np.argwhere(np.in1d(sites, top[0:n_returned])).ravel() + # Choose a random year in the top n_returned years, unless it is + # PFT 3, in which case we just take the best site-year available + _cutoff = 0 if pft in pft_passed else cutoff + choices = [ + all_years[site_years[:,idx][:,i] > _cutoff] + for i in range(0, len(idx)) + ] + # Shuffle the years within each site + for c in choices: + np.random.shuffle(c) + results[p,0:len(idx)] = [c[0] for c in choices[0:len(idx)]] + return (results_sites, results) + + +def report(hdf, by_pft: bool = False): + ''' + Check that we have everything needed to calibrate MOD17 and print the + report to the screen + + Parameters + ---------- + hdf : h5py.File + by_pft : bool + ''' + NPP_KEYS = ('MOD15A2H_fPAR_clim', 'MOD15A2H_LAI_clim', 'NPP_total_filled') + MERRA2_KEYS = ( + 'LWGNT', 'LWGNT_daytime', 'LWGNT_nighttime', 'PS', 'PS_daytime', + 'PS_nighttime', 'QV10M', 'QV10M_daytime', 'QV10M_nighttime', 'SWGDN', + 'SWGDN_daytime', 'SWGDN_nighttime', 'SWGNT', 'SWGNT_daytime', + 'SWGNT_nighttime', 'T10M', 'T10M_daytime', 'T10M_nighttime', 'Tmin') + + def find(hdf, prefix, key, pad = 10, mask = None): + 'Find a key, print the report' + try: + field = '%s/%s' % (prefix, key) + pretty = ('"%s"' % key).ljust(pad) + if mask is None: + print_stats(hdf[field][:], pad, pretty) + else: + shp = hdf[field].shape + if len(shp) == 1: + print_stats(hdf[field][mask], pad, pretty) + if len(shp) == 2: + print_stats(hdf[field][:,mask], pad, pretty) + elif len(shp) == 3: + print_stats(hdf[field][:,mask,:], pad, pretty) + except KeyError: + pretty = ('"%s"' % key).ljust(pad) + print('-- MISSING %s' % pretty) + + def print_stats(data, pad, pretty): + shp = ' x '.join(map(str, data.shape)) + shp = ('[%s]' % shp).ljust(pad + 7) + stats = tuple(summarize(data)) + stats_pretty = '' + if stats[0] is not None: + stats_pretty = '[%.2f, %.2f] (%.2f)' % (stats[0], stats[2], stats[1]) + if len(key) < 10: + print('-- Found %s %s %s' % (pretty, shp, stats_pretty)) + else: + print('-- Found %s' % pretty) + print('%s%s %s' % (''.rjust(pad + 10), shp, stats_pretty)) + + def summarize(data, nodata = -9999): + 'Get summary statistics for a field' + if str(data.dtype).startswith('int'): + return (None for i in range(0, 3)) + data[data == -9999] = np.nan + return ( + getattr(np, f)(data) for f in ('nanmin', 'nanmean', 'nanmax') + ) + + print('\nMOD17: Checking for required driver variables...') + enum = range(0, 1) if not by_pft else (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12) + pft_map = None + for i in enum: + if by_pft: + pft_map = np.arange(0, hdf['NPP/PFT'].size)[hdf['NPP/PFT'][:] == i] + print('\n--------------------') + print('\n-- PFT == %d' % i) + for key in NPP_KEYS: + find(hdf, 'NPP', key, mask = pft_map) + for key in MERRA2_KEYS: + find(hdf, 'NPP/surface_met_MERRA2', key, mask = pft_map) + print('') + + +def vnp15a2h_qc_fail(x): + ''' + Returns pass/fail for QC flags based on the L4C fPAR QC protocol for the + `FparLai_QC` band: Bad pixels have either `11` in the first two bits + ("Fill Value") or anything other than `0` in the 3rd least-significant + bits, which combines "Pixel not produced at all". For example, see decimal + number 80: + + 0101|0|000 where "000" is the combined (Fill bit | Retrieval quality) + + Parameters + ---------- + x : numpy.ndarray + Unsigned, 8-bit integer array + + Returns + ------- + numpy.ndarray + Boolean array + ''' + y = dec2bin_unpack(x) + # Emit 1 = FAIL if sum("11") == 2; "BiomeType" == "Filled Value" + c1 = np.where(y[...,0:2].sum(axis = -1) == 2, 1, 0).astype(np.uint8) + # Emit 1 = FAIL if 3rd bit == 1; "SCF_QC" == "Pixel not produced at all" + c2 = y[...,5] + # Intermediate arrays are 1 = FAIL, 0 = PASS + return (c1 + c2) > 0 + + +def vnp15a2h_cloud_fail(x): + ''' + Returns pass/fail for QC flags based on the L4C fPAR QC protocol for the + `FparExtra_QC` band (cloud QC band): Bad pixels have anything OTHER THAN + `1` second least-significant bit; `00` and `01` being acceptable cloud QC + flags ("Confident clear" and "Probably clear", respectively). + + Parameters + ---------- + x : numpy.ndarray + Unsigned, 8-bit integer array + + Returns + ------- + numpy.ndarray + Boolean array + ''' + y = dec2bin_unpack(x) + return y[...,-2] > 0 diff --git a/mod17/viirs.py b/mod17/viirs.py new file mode 100644 index 0000000..2bf4e8d --- /dev/null +++ b/mod17/viirs.py @@ -0,0 +1,468 @@ +''' +Calibration of MOD17 against a representative, global eddy covariance (EC) +flux tower network. The model calibration is based on Markov-Chain Monte +Carlo (MCMC). This module is for calibrating using VIIRS reflectance, fPAR, +and LAI data, specifically. +''' + +import datetime +import json +import os +import warnings +import numpy as np +import h5py +import arviz as az +import pymc as pm +import aesara.tensor as at +import mod17 +from numbers import Number +from typing import Callable, Sequence +from pathlib import Path +from matplotlib import pyplot +from mod17 import MOD17, PFT_VALID +from mod17.utils import pft_dominant, restore_bplut, write_bplut +from mod17.calibration import BlackBoxLikelihood, MOD17StochasticSampler, CalibrationAPI + +MOD17_DIR = os.path.dirname(mod17.__file__) +# This matplotlib setting prevents labels from overplotting +pyplot.rcParams['figure.constrained_layout.use'] = True + + +class VNP17StochasticSampler(MOD17StochasticSampler): + ''' + A Markov Chain-Monte Carlo (MCMC) sampler for MOD17. The specific sampler + used is the Differential Evolution (DE) MCMC algorithm described by + Ter Braak (2008), though the implementation is specific to the PyMC3 + library. + + Considerations: + + 1. Tower GPP is censored when values are < 0 or when APAR is + < 0.1 MJ m-2 d-1. + + Parameters + ---------- + config : dict + Dictionary of configuration parameters + model : Callable + The function to call (with driver data and parameters); this function + should take driver data as positional arguments and the model + parameters as a `*Sequence`; it should require no external state. + observed : Sequence + Sequence of observed values that will be used to calibrate the model; + i.e., model is scored by how close its predicted values are to the + observed values + params_dict : dict or None + Dictionary of model parameters, to be used as initial values and as + the basis for constructing a new dictionary of optimized parameters + backend : str or None + Path to a NetCDF4 file backend (Default: None) + weights : Sequence or None + Optional sequence of weights applied to the model residuals (as in + weighted least squares) + ''' + # NOTE: This is different than for mod17.MOD17 because we haven't yet + # figured out how the respiration terms are calculated + required_parameters = { + 'GPP': ['LUE_max', 'tmin0', 'tmin1', 'vpd0', 'vpd1'], + 'NPP': MOD17.required_parameters + } + required_drivers = { + 'GPP': ['fPAR', 'Tmin', 'VPD', 'PAR'], + 'NPP': ['fPAR', 'Tmin', 'VPD', 'PAR', 'LAI', 'Tmean', 'years'] + } + + def compile_gpp_model( + self, observed: Sequence, drivers: Sequence) -> pm.Model: + ''' + Creates a new GPP model based on the prior distribution. Model can be + re-compiled multiple times, e.g., for cross validation. + + Parameters + ---------- + observed : Sequence + Sequence of observed values that will be used to calibrate the model; + i.e., model is scored by how close its predicted values are to the + observed values + drivers : list or tuple + Sequence of driver datasets to be supplied, in order, to the + model's run function + + Returns + ------- + pm.Model + ''' + # Define the objective/ likelihood function + log_likelihood = BlackBoxLikelihood( + self.model, observed, x = drivers, weights = self.weights) + # With this context manager, "all PyMC3 objects introduced in the indented + # code block...are added to the model behind the scenes." + with pm.Model() as model: + # (Stochstic) Priors for unknown model parameters + LUE_max = pm.TruncatedNormal('LUE_max', + **self.prior['LUE_max'], **self.bounds['LUE_max']) + # NOTE: All environmental scalars are fixed at their updated + # MOD17 values + tmin0 = self.params['tmin0'] + tmin1 = self.params['tmin1'] + vpd0 = self.params['vpd0'] + vpd1 = self.params['vpd1'] + # Convert model parameters to a tensor vector + params_list = [LUE_max, tmin0, tmin1, vpd0, vpd1] + params = at.as_tensor_variable(params_list) + # Key step: Define the log-likelihood as an added potential + pm.Potential('likelihood', log_likelihood(params)) + return model + + def compile_npp_model( + self, observed: Sequence, drivers: Sequence) -> pm.Model: + ''' + Creates a new NPP model based on the prior distribution. Model can be + re-compiled multiple times, e.g., for cross validation. + + Parameters + ---------- + observed : Sequence + Sequence of observed values that will be used to calibrate the model; + i.e., model is scored by how close its predicted values are to the + observed values + drivers : list or tuple + Sequence of driver datasets to be supplied, in order, to the + model's run function + + Returns + ------- + pm.Model + ''' + # Define the objective/ likelihood function + log_likelihood = BlackBoxLikelihood( + self.model, observed, x = drivers, weights = self.weights) + # With this context manager, "all PyMC3 objects introduced in the indented + # code block...are added to the model behind the scenes." + with pm.Model() as model: + # Setting GPP parameters that are known + LUE_max = self.params['LUE_max'] + tmin0 = self.params['tmin0'] + tmin1 = self.params['tmin1'] + vpd0 = self.params['vpd0'] + vpd1 = self.params['vpd1'] + # SLA fixed at prior mean + SLA = np.exp(self.prior['SLA']['mu']) + # Allometry ratios prescribe narrow range around Collection 6.1 values + froot_leaf_ratio = pm.Triangular( + 'froot_leaf_ratio', **self.prior['froot_leaf_ratio']) + Q10_froot = pm.TruncatedNormal( + 'Q10_froot', **self.prior['Q10_froot'], **self.bounds['Q10']) + leaf_mr_base = pm.LogNormal( + 'leaf_mr_base', **self.prior['leaf_mr_base']) + froot_mr_base = pm.LogNormal( + 'froot_mr_base', **self.prior['froot_mr_base']) + # For GRS and CRO, livewood mass and respiration are zero + if list(self.prior['livewood_mr_base'].values()) == [0, 0]: + livewood_leaf_ratio = 0 + livewood_mr_base = 0 + Q10_livewood = 0 + else: + livewood_leaf_ratio = pm.Triangular( + 'livewood_leaf_ratio', **self.prior['livewood_leaf_ratio']) + livewood_mr_base = pm.LogNormal( + 'livewood_mr_base', **self.prior['livewood_mr_base']) + Q10_livewood = pm.TruncatedNormal( + 'Q10_livewood', **self.prior['Q10_livewood'], + **self.bounds['Q10']) + # Convert model parameters to a tensor vector + params_list = [ + LUE_max, tmin0, tmin1, vpd0, vpd1, SLA, + Q10_livewood, Q10_froot, froot_leaf_ratio, livewood_leaf_ratio, + leaf_mr_base, froot_mr_base, livewood_mr_base + ] + params = at.as_tensor_variable(params_list) + # Key step: Define the log-likelihood as an added potential + pm.Potential('likelihood', log_likelihood(params)) + return model + + +class VIIRSCalibrationAPI(CalibrationAPI): + ''' + Convenience class for calibrating the MOD17 GPP and NPP models. Meant to + be used with `fire.Fire()`. + ''' + def __init__(self, config = None): + config_file = config + if config_file is None: + config_file = os.path.join( + MOD17_DIR, 'data/MOD17_calibration_config.json') + with open(config_file, 'r') as file: + self.config = json.load(file) + self.hdf5 = self.config['data']['file'] + + def tune_gpp( + self, pft: int, plot_trace: bool = False, ipdb: bool = False, + save_fig: bool = False, **kwargs): + ''' + Run the VNP17 GPP calibration. + + Parameters + ---------- + pft : int + The Plant Functional Type (PFT) to calibrate + plot_trace : bool + True to plot the trace for a previous calibration run; this will + also NOT start a new calibration (Default: False) + ipdb : bool + True to drop the user into an ipdb prompt, prior to and instead of + running calibration + save_fig : bool + True to save figures to files instead of showing them + (Default: False) + **kwargs + Additional keyword arguments passed to + `VNP17StochasticSampler.run()` + ''' + assert pft in PFT_VALID, f'Invalid PFT: {pft}' + params_dict = restore_bplut(self.config['BPLUT']['GPP']) + # Load blacklisted sites (if any) + blacklist = self.config['data']['sites_blacklisted'] + # Filter the parameters to just those for the PFT of interest + params_dict = dict([(k, v[pft]) for k, v in params_dict.items()]) + model = MOD17(params_dict) + objective = self.config['optimization']['objective'].lower() + + print('Loading driver datasets...') + with h5py.File(self.hdf5, 'r') as hdf: + sites = hdf['FLUXNET/site_id'][:] + if hasattr(sites[0], 'decode'): + sites = list(map(lambda x: x.decode('utf-8'), sites)) + # Get dominant PFT + pft_map = pft_dominant(hdf['state/PFT'][:], site_list = sites) + # Blacklist various sites + pft_mask = np.logical_and(pft_map == pft, ~np.in1d(sites, blacklist)) + dates = hdf['time'][:] + # For expedience, subset all data to the VIIRS post-launch period + cutoff = np.argwhere(dates[:,0] == 2012).min() + weights = hdf['weights'][pft_mask] + # NOTE: Converting from Kelvin to Celsius + tday = hdf['MERRA2/T10M_daytime'][:][cutoff:,pft_mask] - 273.15 + qv10m = hdf['MERRA2/QV10M_daytime'][:][cutoff:,pft_mask] + ps = hdf['MERRA2/PS_daytime'][:][cutoff:,pft_mask] + drivers = [ # fPAR, Tmin, VPD, PAR + hdf['VIIRS/VNP15A2HGF_fPAR_interp'][:][cutoff:,pft_mask], + hdf['MERRA2/Tmin'][:][cutoff:,pft_mask] - 273.15, + MOD17.vpd(qv10m, ps, tday), + MOD17.par(hdf['MERRA2/SWGDN'][:][cutoff:,pft_mask]), + ] + # Set negative VPD to zero + drivers[2] = np.where(drivers[2] < 0, 0, drivers[2]) + # Convert fPAR from (%) to [0,1] + drivers[0] = np.nanmean(drivers[0], axis = -1) / 100 + # If RMSE is used, then we want to pay attention to weighting + weights = None + if objective in ('rmsd', 'rmse'): + weights = hdf['weights'][pft_mask][np.newaxis,:]\ + .repeat(tday.shape[0], axis = 0) + for d, each in enumerate(drivers): + name = ('fPAR', 'Tmin', 'VPD', 'PAR')[d] + assert not np.isnan(each).any(),\ + f'Driver dataset "{name}" contains NaNs' + tower_gpp = hdf['FLUXNET/GPP'][:][cutoff:,pft_mask] + # Read the validation mask; mask out observations that are + # reserved for validation + print('Masking out validation data...') + mask = hdf['FLUXNET/validation_mask_VNP17'][pft] + tower_gpp[mask] = np.nan + + # Clean observations, then mask out driver data where the are no + # observations + tower_gpp = self.clean_observed( + tower_gpp, drivers, VNP17StochasticSampler.required_drivers['GPP'], + protocol = 'GPP') + if weights is not None: + weights = weights[~np.isnan(tower_gpp)] + for d, _ in enumerate(drivers): + drivers[d] = drivers[d][~np.isnan(tower_gpp)] + tower_gpp = tower_gpp[~np.isnan(tower_gpp)] + + print('Initializing sampler...') + backend = self.config['optimization']['backend_template'] % ('GPP', pft) + sampler = VNP17StochasticSampler( + self.config, MOD17._gpp, params_dict, backend = backend, + weights = weights) + if plot_trace or ipdb: + if ipdb: + import ipdb + ipdb.set_trace() + trace = sampler.get_trace() + az.plot_trace(trace, var_names = VNP17.required_parameters[0:5]) + pyplot.show() + return + # Get (informative) priors for just those parameters that have them + with open(self.config['optimization']['prior'], 'r') as file: + prior = json.load(file) + prior_params = filter( + lambda p: p in prior.keys(), sampler.required_parameters['GPP']) + prior = dict([ + (p, {'mu': prior[p]['mu'][pft], 'sigma': prior[p]['sigma'][pft]}) + for p in prior_params + ]) + sampler.run( + tower_gpp, drivers, prior = prior, save_fig = save_fig, **kwargs) + + def tune_npp( + self, pft: int, plot_trace: bool = False, ipdb: bool = False, + save_fig: bool = False, climatology = False, + cutoff: Number = 2385, k_folds: int = 1, **kwargs): + ''' + Run the VNP17 NPP calibration. + + Parameters + ---------- + pft : int + The Plant Functional Type (PFT) to calibrate + plot_trace : bool + True to display the trace plot ONLY and not run calibration + (Default: False) + ipdb : bool + True to drop into an interactive Python debugger (`ipdb`) after + loading an existing trace (Default: False) + save_fig : bool + True to save the post-calibration trace plot to a file instead of + displaying it (Default: False) + climatology : bool + True to use a MERRA-2 climatology (and look for it in the drivers + file), i.e., use `MERRA2_climatology` group instead of + `surface_met_MERRA2` group (Default: False) + cutoff : Number + Maximum value of observed NPP (g C m-2 year-1); values above this + cutoff will be discarded and not used in calibration + (Default: 2385) + k_folds : int + Number of folds to use in k-folds cross-validation; defaults to + k=1, i.e., no cross-validation is performed. + **kwargs + Additional keyword arguments passed to + `VNP17StochasticSampler.run()` + ''' + assert pft in PFT_VALID, f'Invalid PFT: {pft}' + prefix = 'MERRA2_climatology' if climatology else 'surface_met_MERRA2' + params_dict = restore_bplut(self.config['BPLUT']['NPP']) + # Filter the parameters to just those for the PFT of interest + params_dict = dict([(k, v[pft]) for k, v in params_dict.items()]) + model = MOD17(params_dict) + kwargs.update({'var_names': [ + '~LUE_max', '~tmin0', '~tmin1', '~vpd0', '~vpd1', '~log_likelihood' + ]}) + # Pass configuration parameters to VNP17StochasticSampler.run() + for key in ('chains', 'draws', 'tune', 'scaling'): + if key in self.config['optimization'].keys(): + kwargs[key] = self.config['optimization'][key] + + print('Loading driver datasets...') + with h5py.File(self.hdf5, 'r') as hdf: + # NOTE: This is only recorded at the site-level; no need to + # determine modal PFT across subgrid + pft_map = hdf['NPP/PFT'][:] + # Leave out sites where there is no fPAR (and no LAI) data + fpar = hdf['NPP/MOD15A2H_fPAR_clim'][:] + mask = np.logical_and( + pft_map == pft, ~np.isnan(np.nanmean(fpar, axis = -1))\ + .all(axis = 0)) + # NOTE: Converting from Kelvin to Celsius + tday = hdf[f'NPP/{prefix}/T10M_daytime'][:][:,mask] - 273.15 + qv10m = hdf[f'NPP/{prefix}/QV10M_daytime'][:][:,mask] + ps = hdf[f'NPP/{prefix}/PS_daytime'][:][:,mask] + drivers = [ # fPAR, Tmin, VPD, PAR, LAI, Tmean, years + hdf['NPP/VNP15A2H_fPAR_clim'][:][:,mask], + hdf[f'NPP/{prefix}/Tmin'][:][:,mask] - 273.15, + MOD17.vpd(qv10m, ps, tday), + MOD17.par(hdf[f'NPP/{prefix}/SWGDN'][:][:,mask]), + hdf['NPP/VNP15A2H_LAI_clim'][:][:,mask], + hdf[f'NPP/{prefix}/T10M'][:][:,mask] - 273.15, + np.full(ps.shape, 1) # i.e., A 365-day climatological year ("Year 1") + ] + observed_npp = hdf['NPP/NPP_total'][:][mask] + if cutoff is not None: + observed_npp[observed_npp > cutoff] = np.nan + # Set negative VPD to zero + drivers[2] = np.where(drivers[2] < 0, 0, drivers[2]) + # Convert fPAR from (%) to [0,1] and re-scale LAI; reshape fPAR, LAI + drivers[0] = np.nanmean(drivers[0], axis = -1) * 0.01 + drivers[4] = np.nanmean(drivers[4], axis = -1) * 0.1 + # Mask out driver data where the are no observations + for d, _ in enumerate(drivers): + drivers[d] = drivers[d][:,~np.isnan(observed_npp)] + observed_npp = observed_npp[~np.isnan(observed_npp)] + + if k_folds > 1: + # Back-up the original (complete) datasets + _drivers = [d.copy() for d in drivers] + _observed_npp = observed_npp.copy() + # Randomize the indices of the NPP data + indices = np.arange(0, observed_npp.size) + np.random.shuffle(indices) + # Get the starting and ending index of each fold + fold_idx = np.array([indices.size // k_folds] * k_folds) * np.arange(0, k_folds) + fold_idx = list(map(list, zip(fold_idx, fold_idx + indices.size // k_folds))) + # Ensure that the entire dataset is used + fold_idx[-1][-1] = indices.max() + idx_test = [indices[start:end] for start, end in fold_idx] + + for k, fold in enumerate(range(1, k_folds + 1)): + backend = self.config['optimization']['backend_template'] % ('NPP', pft) + if k_folds > 1: + # Create an HDF5 file with the same name as the (original) + # netCDF4 back-end, store the test indices + with h5py.File(backend.replace('nc4', 'h5'), 'w') as hdf: + out = list(idx_test) + size = indices.size // k_folds + try: + out = np.stack(out) + except ValueError: + size = max((o.size for o in out)) + for i in range(0, len(out)): + out[i] = np.concatenate((out[i], [np.nan] * (size - out[i].size))) + hdf.create_dataset( + 'test_indices', (k_folds, size), np.int32, np.stack(out)) + backend = self.config['optimization']['backend_template'] % (f'NPP-k{fold}', pft) + # Restore the original NPP dataset + observed_npp = _observed_npp.copy() + # Set to NaN all the test indices + idx = idx_test[k] + observed_npp[idx] = np.nan + # Same for drivers, after restoring from the original + drivers = [d.copy()[:,~np.isnan(observed_npp)] for d in _drivers] + observed_npp = observed_npp[~np.isnan(observed_npp)] + + print('Initializing sampler...') + sampler = VNP17StochasticSampler( + self.config, MOD17._npp, params_dict, backend = backend, + model_name = 'NPP') + if plot_trace or ipdb: + if ipdb: + import ipdb + ipdb.set_trace() + trace = sampler.get_trace() + az.plot_trace(trace, var_names = MOD17.required_parameters[5:]) + pyplot.show() + return + # Get (informative) priors for just those parameters that have them + with open(self.config['optimization']['prior'], 'r') as file: + prior = json.load(file) + prior_params = filter( + lambda p: p in prior.keys(), sampler.required_parameters['NPP']) + prior = dict([ + (p, prior[p]) for p in prior_params + ]) + for key in prior.keys(): + # And make sure to subset to the chosen PFT! + for arg in prior[key].keys(): + prior[key][arg] = prior[key][arg][pft] + sampler.run( + observed_npp, drivers, prior = prior, save_fig = save_fig, + **kwargs) + + +if __name__ == '__main__': + import fire + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + fire.Fire(VIIRSCalibrationAPI) diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..c3a4358 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,33 @@ +[metadata] +name = mod17 +version = 0.1.0 +author = K. Arthur Endsley +author_email = arthur.endsley@ntsg.umt.edu +description = Python tools for MOD17 algorithm +classifiers = + Programming Language :: Python :: 3 + Development Status :: 3 - Alpha + Intended Audience :: Science/Research + Topic :: Scientific/Engineering + +[options] +packages = mod17 +py_modules = mod17.data, mod17.science, mod17.srs, mod17.utils, mod17.calibration, mod17.sensitivity, mod17.simulation +python_requires = >=3.8.0 +install_requires = + h5py>=3.4.0 + netCDF4>=1.5.7 + numpy<=1.21.5 + scipy>=1.7.0 + xarray>=0.19.0 + suntransit>=0.1.0 + tqdm>=4.60.0 + fire>=0.4.0 + +[options.package_data] +* = data/*.csv + +[options.extras_require] +docs = pdoc3>=0.9.2 +sensitivity = SALib>=1.4.5 +simulation = pyl4c>=0.1.0; affine>=2.3.0; rasterio>=1.2.0; affine>=2.3.0; pyproj>=3.3.0 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..5bb4a11 --- /dev/null +++ b/setup.py @@ -0,0 +1,3 @@ +from distutils.core import setup + +setup() diff --git a/tests/tests.py b/tests/tests.py new file mode 100644 index 0000000..dfca54d --- /dev/null +++ b/tests/tests.py @@ -0,0 +1,359 @@ +''' +Unit tests for the mod17 Python utilities library. +''' + +import os +import unittest +import numpy as np +import mod17 +from mod17 import MOD17 +from mod17.utils import restore_bplut +from mod17.srs import modis_from_wgs84, modis_to_wgs84, modis_tile_from_wgs84, modis_row_col_from_wgs84, modis_row_col_to_wgs84 + +MOD17_BPLUT = os.path.join( + os.path.dirname(mod17.__file__), 'data/MOD17_BPLUT_CX.X_MERRA_NASA.csv') + +class GPP(unittest.TestCase): + ''' + Suite of GPP test cases based on the Collection 5.1 BPLUT. + ''' + @classmethod + def setUp(cls): + cls.pft = np.arange(0, 13) + cls.params = dict([ + (k, v[cls.pft]) for k, v in restore_bplut(MOD17_BPLUT).items() + ]) + cls.drivers_annual = [ # Sequence of daily driver data over 1 year + np.repeat(np.array((0.5,)), 365)[:,np.newaxis]\ + .repeat(cls.pft.size, axis = -1), # fPAR + np.repeat(np.array((15,)), 365)[:,np.newaxis]\ + .repeat(cls.pft.size, axis = -1), # Tmin + np.repeat(np.array((1000,)), 365)[:,np.newaxis]\ + .repeat(cls.pft.size, axis = -1), # VPD + np.repeat(np.array((10,)), 365)[:,np.newaxis]\ + .repeat(cls.pft.size, axis = -1), # PAR + np.repeat(np.array((1,)), 365)[:,np.newaxis]\ + .repeat(cls.pft.size, axis = -1), # LAI + np.repeat(np.array((25,)), 365)[:,np.newaxis]\ + .repeat(cls.pft.size, axis = -1), # Tmean + np.repeat(np.array((2010,)), 365)[:,np.newaxis]\ + .repeat(cls.pft.size, axis = -1) # years + ] + + def test_gpp(self): + 'Should model GPP as expected' + model = MOD17(self.params) + par = model.par(100) + answer = np.array([ + np.nan, 2., 2.73, 2.04, 2.51, 2.01, 2.63, + 1.81, 2.37, 2.31, 1.91, np.nan, 2.07 + ]) + pred = model.daily_gpp(fpar = 0.5, tmin = 10, vpd = 1000, par = par)\ + .round(2) + self.assertTrue( + np.logical_or(np.isnan(pred), np.equal(pred, answer)).all()) + + def test_gpp_static_method(self): + 'Should model GPP as expected, from class method' + model = MOD17(self.params) + params = [ + self.params[p][2] # i.e., PFT = 2 + for p in ('LUE_max', 'tmin0', 'tmin1', 'vpd0', 'vpd1') + ] + par = model.par(100) + predicted = MOD17._gpp(params, 0.5, 10, 1000, par).round(2) + self.assertTrue(np.equal(predicted, 2.73).all()) + + def test_gpp_temperature_ramp(self): + 'Should accurately model GPP over a range of temperatures' + tmin_sweep = np.arange(-10, 35, 5) + answer = np.array([ + np.full((9,), np.nan), # i.e., PFT == 0 + [0., 0.37, 0.98, 1.6, 2., 2., 2., 2., 2. ], + [0., 0.48, 1.28, 2.08, 2.73, 2.73, 2.73, 2.73, 2.73], + [0., 0.34, 0.91, 1.48, 2.04, 2.09, 2.09, 2.09, 2.09], + [0., 0.16, 0.94, 1.73, 2.51, 2.51, 2.51, 2.51, 2.51], + [0., 0.24, 0.85, 1.46, 2.01, 2.01, 2.01, 2.01, 2.01], + [0., 0.47, 1.27, 2.06, 2.63, 2.63, 2.63, 2.63, 2.63], + [0., 0.32, 0.86, 1.4, 1.81, 1.81, 1.81, 1.81, 1.81], + [0., 0.4, 1.05, 1.71, 2.37, 2.55, 2.55, 2.55, 2.55], + [0., 0.39, 1.03, 1.67, 2.31, 2.49, 2.49, 2.49, 2.49], + [0., 0.32, 0.85, 1.38, 1.91, 2.13, 2.13, 2.13, 2.13], + np.full((9,), np.nan), # i.e, PFT == 11 + [0., 0.34, 0.92, 1.49, 2.07, 2.3, 2.3, 2.3, 2.3 ]]) + model = MOD17(self.params) + for i, tmin in enumerate(tmin_sweep.tolist()): + par = model.par(100) + pred = model.daily_gpp(fpar = 0.5, tmin = tmin, vpd = 1000, par = par).round(2) + self.assertTrue(np.logical_or( + np.isnan(pred), + np.equal(answer[:,i], pred)).all()) + + def test_gpp_vpd_ramp(self): + 'Should accurately model GPP over a range of VPD' + vpd_sweep = np.arange(0, 6000, 1000) + answer = np.array([ + np.full((6,), np.nan), # i.e., PFT == 0 + [2.35, 2. , 1. , 0. , 0. , 0.], + [2.73, 2.73, 1.82, 0.91, 0. , 0.], + [2.39, 2.09, 1.26, 0.42, 0. , 0.], + [2.97, 2.51, 1.19, 0. , 0. , 0.], + [2.38, 2.01, 0.95, 0. , 0. , 0.], + [2.91, 2.63, 1.83, 1.04, 0.24, 0.], + [2. , 1.81, 1.28, 0.75, 0.21, 0.], + [2.91, 2.55, 1.53, 0.51, 0. , 0.], + [2.83, 2.49, 1.53, 0.57, 0. , 0.], + [2.36, 2.13, 1.46, 0.8 , 0.13, 0.], + np.full((6,), np.nan), # i.e., PFT == 11 + [2.53, 2.3 , 1.64, 0.98, 0.33, 0.]]) + model = MOD17(self.params) + for i, vpd in enumerate(vpd_sweep.tolist()): + par = model.par(100) + pred = model.daily_gpp( + fpar = 0.5, tmin = 20, vpd = vpd, par = par).round(2) + self.assertTrue(np.logical_or( + np.isnan(pred), + np.equal(answer[:,i], pred)).all()) + + def test_daily_respiration(self): + 'Should accurately calculate daily respiration' + lai = 1 + tmean = 25 + model = MOD17(self.params) + pred_leaf, pred_froot = model.daily_respiration(lai, tmean) + self.assertTrue(np.logical_or( + np.isnan(pred_leaf), + np.equal(pred_leaf.round(3), np.array([ + np.nan, 0.579, 0.323, 0.694, 0.453, 0.495, 1.33, 0.622, 0.434, + 0.433, 0.371, np.nan, 0.371 + ])) + ).all()) + self.assertTrue(np.logical_or( + np.isnan(pred_froot), + np.equal(pred_froot.round(3), np.array([ + np.nan, 0.587, 0.3, 0.738, 0.327, 0.357, 0.781, 0.795, 0.459, + 0.457, 0.792, np.nan, 0.61 + ])) + ).all()) + + def test_daily_respiration_lai_ramp(self): + 'Should accurately calculate daily respiration over range of LAI' + lai_sweep = np.array((0.1, 1, 2, 3, 4.5)) + tmean = 25 + model = MOD17(self.params) + answer1 = np.array([ + [np.nan, 0.06, 0.03, 0.07, 0.05, 0.05, 0.13, 0.06, 0.04, 0.04, 0.04, np.nan, 0.04], + [np.nan, 0.58, 0.32, 0.69, 0.45, 0.50, 1.33, 0.62, 0.43, 0.43, 0.37, np.nan, 0.37], + [np.nan, 1.16, 0.65, 1.39, 0.91, 0.99, 2.66, 1.24, 0.87, 0.87, 0.74, np.nan, 0.74], + [np.nan, 1.74, 0.97, 2.08, 1.36, 1.49, 3.99, 1.87, 1.3 , 1.30, 1.11, np.nan, 1.11], + [np.nan, 2.61, 1.45, 3.12, 2.04, 2.23, 5.99, 2.80, 1.95, 1.95, 1.67, np.nan, 1.67], + ]) + answer2 = np.array([ + [np.nan, 0.06, 0.03, 0.07, 0.03, 0.04, 0.08, 0.08, 0.05, 0.05, 0.08, np.nan, 0.06], + [np.nan, 0.59, 0.30, 0.74, 0.33, 0.36, 0.78, 0.80, 0.46, 0.46, 0.79, np.nan, 0.61], + [np.nan, 1.17, 0.60, 1.48, 0.65, 0.71, 1.56, 1.59, 0.92, 0.91, 1.58, np.nan, 1.22], + [np.nan, 1.76, 0.90, 2.21, 0.98, 1.07, 2.34, 2.39, 1.38, 1.37, 2.38, np.nan, 1.83], + [np.nan, 2.64, 1.35, 3.32, 1.47, 1.61, 3.51, 3.58, 2.06, 2.06, 3.57, np.nan, 2.74], + ]) + for i, lai in enumerate(lai_sweep.tolist()): + pred1, pred2 = model.daily_respiration(lai, tmean) + pred1 = pred1.round(2) + pred2 = pred2.round(2) + self.assertTrue(np.logical_or( + np.isnan(answer1[i]), np.equal(answer1[i], pred1)).all()) + self.assertTrue(np.logical_or( + np.isnan(answer2[i]), np.equal(answer2[i], pred2)).all()) + + def test_daily_respiration_temperature_ramp(self): + 'Should accurately calculate daily respiration over temperature range' + tmean_sweep = np.arange(-10, 35, 5) + lai = 1 + model = MOD17(self.params) + answer1 = np.array([ + [np.nan, 0.01, 0. , 0.01, 0.01, 0.01, 0.02, 0.01, 0.01, 0.01, 0.01, np.nan, 0.01], + [np.nan, 0.02, 0.01, 0.02, 0.01, 0.02, 0.04, 0.02, 0.01, 0.01, 0.01, np.nan, 0.01], + [np.nan, 0.04, 0.02, 0.05, 0.03, 0.03, 0.09, 0.04, 0.03, 0.03, 0.02, np.nan, 0.02], + [np.nan, 0.08, 0.04, 0.09, 0.06, 0.07, 0.18, 0.08, 0.06, 0.06, 0.05, np.nan, 0.05], + [np.nan, 0.15, 0.08, 0.17, 0.11, 0.12, 0.33, 0.16, 0.11, 0.11, 0.09, np.nan, 0.09], + [np.nan, 0.25, 0.14, 0.3 , 0.2 , 0.22, 0.58, 0.27, 0.19, 0.19, 0.16, np.nan, 0.16], + [np.nan, 0.4 , 0.22, 0.48, 0.31, 0.34, 0.92, 0.43, 0.3 , 0.3 , 0.26, np.nan, 0.26], + [np.nan, 0.58, 0.32, 0.69, 0.45, 0.5 , 1.33, 0.62, 0.43, 0.43, 0.37, np.nan, 0.37], + [np.nan, 0.74, 0.41, 0.89, 0.58, 0.63, 1.7 , 0.8 , 0.56, 0.55, 0.47, np.nan, 0.47], + ]) + answer2 = np.array([ + [np.nan, 0.05, 0.03, 0.07, 0.03, 0.03, 0.07, 0.07, 0.04, 0.04, 0.07, np.nan, 0.05], + [np.nan, 0.07, 0.04, 0.09, 0.04, 0.04, 0.1 , 0.1 , 0.06, 0.06, 0.1 , np.nan, 0.08], + [np.nan, 0.1 , 0.05, 0.13, 0.06, 0.06, 0.14, 0.14, 0.08, 0.08, 0.14, np.nan, 0.11], + [np.nan, 0.15, 0.08, 0.18, 0.08, 0.09, 0.2 , 0.2 , 0.11, 0.11, 0.2 , np.nan, 0.15], + [np.nan, 0.21, 0.11, 0.26, 0.12, 0.13, 0.28, 0.28, 0.16, 0.16, 0.28, np.nan, 0.22], + [np.nan, 0.29, 0.15, 0.37, 0.16, 0.18, 0.39, 0.4 , 0.23, 0.23, 0.4 , np.nan, 0.3 ], + [np.nan, 0.42, 0.21, 0.52, 0.23, 0.25, 0.55, 0.56, 0.32, 0.32, 0.56, np.nan, 0.43], + [np.nan, 0.59, 0.3 , 0.74, 0.33, 0.36, 0.78, 0.8 , 0.46, 0.46, 0.79, np.nan, 0.61], + [np.nan, 0.83, 0.42, 1.04, 0.46, 0.51, 1.1 , 1.12, 0.65, 0.65, 1.12, np.nan, 0.86], + ]) + for i, tmean in enumerate(tmean_sweep.tolist()): + pred1, pred2 = model.daily_respiration(lai, tmean) + pred1 = pred1.round(2) + pred2 = pred2.round(2) + self.assertTrue(np.logical_or( + np.isnan(answer1[i]), np.equal(answer1[i], pred1)).all()) + self.assertTrue(np.logical_or( + np.isnan(answer2[i]), np.equal(answer2[i], pred2)).all()) + + def test_annual_respiration(self): + 'Should correctly calculate annual respiration' + lai = np.repeat(np.array((1,)), 365)[:,np.newaxis]\ + .repeat(self.pft.size, axis = -1) + tmean = np.repeat(np.array((25,)), 365)[:,np.newaxis]\ + .repeat(self.pft.size, axis = -1) + years = np.repeat(np.array((2010,)), 365)[:,np.newaxis]\ + .repeat(self.pft.size, axis = -1) + model = MOD17(self.params) + r_leaf, r_froot, r_livewood = model.annual_respiration( + lai, tmean, years) + self.assertTrue(np.equal(r_leaf.round(1), np.array([[ + 0., 211.5, 117.9, 253.2, 165.4, 180.8, 485.5, 227.1, 158.5, + 157.9, 135.4, 0., 135.4 + ]])).all()) + self.assertTrue(np.equal(r_froot.round(1), np.array([[ + 0., 214.3, 109.6, 269.5, 119.3, 130.4, 285., 290.2, 167.4, + 166.9, 289.3, 0., 222.5 + ]])).all()) + self.assertTrue(np.equal(r_livewood.round(1), np.array([[ + 0., 24.9, 12.3, 20., 15.7, 17.2, 18.9, 3.8, 5.1, 0.9, 0., 0., 0. + ]])).all()) + + def test_annual_npp(self): + 'Should correctly calculate annual NPP' + fpar, tmin, vpd, par, lai, tmean, years = self.drivers_annual + model = MOD17(self.params) + npp = model.annual_npp(fpar, tmin, vpd, par, lai, tmean, years) + npp[np.isnan(npp)] = 0 + self.assertTrue(np.equal(npp.round(0), np.array([[ + 0., 1144., 1859., 1137., 1641., 1249., 1342., 943., 1654., + 1610., 1259., 0., 1439. + ]])).all()) + + def test_annual_npp_static_method(self): + 'Should calculate annual NPP same when using low-level API' + pft = 1 + params = [ + self.params[p][pft] for p in MOD17.required_parameters + ] + # Get the driver data for the specific PFT + drivers = map(lambda x: x[:,pft], self.drivers_annual) + npp = MOD17._npp(params, *drivers) + self.assertEqual(npp.round(2), 1144.22) + + +class CoordinateTransformations(unittest.TestCase): + ''' + Suite of tests related to coordinate transformations involving the MODIS + Sinusoidal projection. + ''' + wgs84_coords = [ # (Longitude, Latitude) + ( 30.5, -25.5), + (-50.5, -30.1), + (-10.1, 45.6), + (125.1, 65.5), + ] + sinusoidal_coords = [ + ( 3061072.0, -2835473.8), + (-4858128.1, -3346971.1), + ( -785770.9, 5070494.4), + ( 5768590.8, 7283275.9), + ] + tiles = [ # (h, v) + (20, 11), (13, 12), (17, 4), (23, 2), + ] + row_col_500m = [ + (1319, 1806), (23, 1513), (1055, 703), (1079, 450) + ] + row_col_1000m = [ + (659, 902), (11, 756), (527, 351), (539, 224) + ] + + def test_modis_from_wgs84(self): + 'Should correctly determine Sinusoidal coordinates from WGS84 coords.' + for i, pair in enumerate(self.wgs84_coords): + x, y = modis_from_wgs84(pair) + answer = self.sinusoidal_coords[i] + self.assertTrue( + x.round(1) == answer[0] and y.round(1) == answer[1]) + # Test vectorized version + self.assertTrue(np.equal( + np.stack(self.sinusoidal_coords, axis = -1), + modis_from_wgs84(np.stack(self.wgs84_coords, axis = -1)).round(1) + ).all()) + + def test_modis_to_wgs84(self): + 'Should correctly determine WGS84 coordinates from Sinusoidal coords.' + for i, pair in enumerate(self.sinusoidal_coords): + x, y = modis_to_wgs84(pair) + answer = self.wgs84_coords[i] + self.assertTrue( + x.round(1) == answer[0] and y.round(1) == answer[1]) + # Test vectorized version + self.assertTrue(np.equal( + np.stack(self.wgs84_coords, axis = -1), + modis_to_wgs84(np.stack(self.sinusoidal_coords, axis = -1)).round(1) + ).all()) + + def test_modis_row_col_from_wgs84_500m(self): + 'Should correctly determine MODIS row, column from WGS84 coordinates' + for i, pair in enumerate(self.wgs84_coords): + r, c = modis_row_col_from_wgs84(pair, nominal = 500) + answer = self.row_col_500m[i] + self.assertTrue(r == answer[0] and c == answer[1]) + # Test vectorized version + self.assertTrue(np.equal( + np.stack(self.row_col_500m, axis = -1), + modis_row_col_from_wgs84(np.stack(self.wgs84_coords, axis = -1)) + ).all()) + + def test_modis_row_col_from_wgs84_1000m(self): + 'Should correctly determine MODIS row, column from WGS84 coordinates' + for i, pair in enumerate(self.wgs84_coords): + r, c = modis_row_col_from_wgs84(pair, nominal = 1000) + answer = self.row_col_1000m[i] + self.assertTrue(r == answer[0] and c == answer[1]) + + def test_modis_row_col_to_wgs84_500m(self): + 'Should correctly determine WGS84 coordinates from MODIS row, column' + for i, pair in enumerate(self.row_col_500m): + x, y = modis_row_col_to_wgs84( + pair, h = self.tiles[i][0], v = self.tiles[i][1], nominal = 500) + x, y = list(map(lambda x: round(x, 1), (x, y))) + answer = self.wgs84_coords[i] + self.assertTrue(x == answer[0] and y == answer[1]) + # Test vectorized version + self.assertTrue(np.equal( + np.stack(self.wgs84_coords, axis = -1), + modis_row_col_to_wgs84( + np.stack(self.row_col_500m, axis = -1), + *np.stack(self.tiles, axis = -1)).round(1) + ).all()) + + def test_modis_row_col_to_wgs84_1000m(self): + 'Should correctly determine WGS84 coordinates from MODIS row, column' + for i, pair in enumerate(self.row_col_1000m): + x, y = modis_row_col_to_wgs84( + pair, h = self.tiles[i][0], v = self.tiles[i][1], nominal = 1000) + x, y = list(map(lambda x: round(x, 1), (x, y))) + answer = self.wgs84_coords[i] + self.assertTrue(x == answer[0] and y == answer[1]) + + def test_modis_tile_from_wgs84(self): + 'Should correctly determine the MODIS tile from WGS84 coordinates' + for i, pair in enumerate(self.wgs84_coords): + h, v = modis_tile_from_wgs84(pair) + self.assertTrue(h == self.tiles[i][0] and v == self.tiles[i][1]) + # Test vectorized version + self.assertTrue(np.equal( + np.stack(self.tiles, axis = -1), + modis_tile_from_wgs84(np.stack(self.wgs84_coords, axis = 1)) + ).all()) + + +if __name__ == '__main__': + unittest.main()