Skip to content

Commit

Permalink
revised
Browse files Browse the repository at this point in the history
  • Loading branch information
frazane committed May 10, 2023
0 parents commit d518207
Show file tree
Hide file tree
Showing 28 changed files with 3,989 additions and 0 deletions.
14 changes: 14 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
.dev/
.snakemake/
.vscode/
logs/
results/data_partition/
results/preprocess/
results/train/
results/tune
results/figures
results/experiments
slurm/
local/

**/__pycache__
29 changes: 29 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
BSD 3-Clause License

Copyright (c) 2022, Francesco Zanetta
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Physics-constrained deep learning postprocessing of temperature and humidity

Workflow for the physics-constrained deep learning postprocessing of temperature and humidity (paper under review). This work investigates the effect of enforcing dependencies between variables by constraining the optimization of neural networks with thermodynamic state equations.

Pre-print: [https://arxiv.org/abs/2212.04487](https://arxiv.org/abs/2212.04487)

__Installation:__
If using conda, simply replace mamba with conda. We reccommend you set the two environment variables that indicate where the workflow environments and data will be located. By default, these will be located in a `.snakemake/conda` and `data/` respectively.
```
mamba env create -f environment.yaml
mamba env config vars set SNAKEMAKE_CONDA_PREFIX=<path> SNAKEMAKE_DATA_DIR=<path>
```

Visualize the workflow:
```
snakemake all_results --dag | dot -Tpdf > dag.pdf
snakemake all_results --rulegraph | dot -Tpdf > rulegraph.pdf
```
84 changes: 84 additions & 0 deletions Snakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import pandas as pd
import os
from functools import partial

configfile: "config/config.yaml"
configfile: "config/experiments.yaml"
configfile: "config/extras.yaml"
configfile: "config/tuning.yaml"

APPROACHES = [
"unconstrained",
"loss_constrained",
"architecture_constrained",
"offline_constrained"
]

TASKS = [
"air_temperature",
"dew_point_temperature",
"surface_air_pressure",
"relative_humidity",
"water_vapor_mixing_ratio",
]

N_SPLITS = config["data_partitioning"]["forecast_reference_time"]["n_splits"]
DATA_DIR = Path(os.getenv("SNAKEMAKE_DATA_DIR", "data/"))


include: "rules/common.smk"
include: "rules/eda.smk"
include: "rules/main.smk"
include: "rules/extras.smk"

# rule all_eda:
# "results/eda/stations/", "results/eda/model"

rule all_results:
input:
expand(
"results/experiments/{experiment}/{partition}/{focus}",
partition=["test"],
experiment=["default","time_generalization"],
focus=["performance","physical_consistency"]
),
expand(
"results/experiments/{experiment}/{partition}/analysis",
partition=["test"],
experiment=["loss_alpha", "data_efficiency", "time_generalization"]
),
expand(
"results/experiments/{experiment}/{partition}/physical_consistency",
partition=["test"],
experiment=["data_reduction_consistency"]
)


rule all_performance:
input:
expand(
"results/experiments/{experiment}/{partition}/performance",
partition=["test", "train"],
experiment=["default","time_generalization"]
)


rule all_physical_consistency:
input:
expand(
"results/experiments/{experiment}/{partition}/physical_consistency",
partition=["test"],
experiment=["default","time_generalization","data_reduction_consistency"]
)


rule all_analysis:
input:
expand(
"results/experiments/{experiment}/{partition}/analysis",
partition=["test"],
experiment=["loss_alpha", "data_efficiency", "time_generalization"]
)



78 changes: 78 additions & 0 deletions config/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
logging:
level: DEBUG
format: "%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s"
datefmt: "%Y-%m-%d %H:%M:%S"

features:
- coe:air_temperature_ensavg
- coe:dew_point_temperature_ensavg
- coe:dew_point_depression_ensavg
- coe:surface_air_pressure_ensavg
- coe:relative_humidity_ensavg
- coe:water_vapor_mixing_ratio_ensavg
- coe:leadtime
- time:cos_hourofday
- time:sin_hourofday
- time:cos_dayofyear
- time:sin_dayofyear

targets:
- obs:air_temperature
- obs:dew_point_temperature
- obs:surface_air_pressure
- obs:relative_humidity
- obs:water_vapor_mixing_ratio

random_seeds: [100, 200, 300]

devices: 1

data_partitioning:
forecast_reference_time:
p: [0.6, 0.2, 0.2]
n_splits: 4

# data
data.reduction: # 0.1
data.train_season: # ["SON","DJF","MAM"]
data.test_season: # JJA

# net
net.l1: 256
net.l2: 256
net.embedding_size: 6
net.constraint: False
net.out_size: 5

# loss
loss.alpha: 0.0
loss.mask: [True, True, True, True, True]
loss.trainable: True
# loss.log_var_init: [0., 0., 0., 0., 0.]
# loss.log_var_init: [1.5, 1.7, 0.5, 4.5, -0.5]

# fit
# fit.lr_patience: 4
fit.lr: 0.0007
fit.batch_size: 512
fit.max_epochs: 25
fit.patience: 5

# approach-specific parameters
unconstrained:
{}
# fit.lr: 0.005
# fit.batch_size: 1024
architecture_constrained:
net.constraint: True
# fit.lr: 0.005
# fit.batch_size: 1024
loss_constrained:
# fit.lr: 0.005
# fit.batch_size: 1024
loss.alpha: 0.995
offline_constrained:
net.constraint: True
loss.mask: [True, True, True, False, False]
# fit.lr: 0.005
# fit.batch_size: 1024
84 changes: 84 additions & 0 deletions config/experiments.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
experiments:
# generic experiments (do not have their own notebook)
default:
approaches:
- unconstrained
- architecture_constrained
- loss_constrained
- offline_constrained
splits: [0, 1, 2, 3]
seeds: [100, 200, 300]

no_loss_weights:
approaches:
- unconstrained
- architecture_constrained
- loss_constrained
- offline_constrained
param_override:
loss.trainable: False
splits: [0, 1, 2, 3]
seeds: [100, 200, 300]


data_efficiency:
approaches:
- unconstrained
- architecture_constrained
- loss_constrained
- offline_constrained
param_runs:
- { data.reduction: 1.0, net.l1: 256, net.l2: 256, net.embedding_size: 6, fit.lr: 0.007, fit.batch_size: 512}
- { data.reduction: 0.2, net.l1: 64, net.l2: 64, net.embedding_size: 3, fit.lr: 0.004, fit.batch_size: 64}
- { data.reduction: 0.05, net.l1: 64, net.l2: 32, net.embedding_size: 3, fit.lr: 0.0035, fit.batch_size: 64}
- { data.reduction: 0.01, net.l1: 32, net.l2: 32, net.embedding_size: 3, fit.lr: 0.003, fit.batch_size: 64}
param_override:
fit.max_epochs: 50
fit.patience: 5
splits: [0, 1, 2, 3]
seeds: [100, 200, 300]

time_generalization:
approaches:
- unconstrained
- architecture_constrained
- loss_constrained
- offline_constrained
param_override:
data.train_season: ["SON", "DJF", "MAM"]
data.test_season: ["JJA"]
net.l1: 64
net.l2: 64
net.embedding_size: 5
fit.lr: 0.002
fit.batch_size: 256
fit.max_epochs: 15
fit.patience: 5
splits: [0, 1, 2, 3]
seeds: [100, 200, 300]

# parameter-specific experiments (have their own notebooks)
loss_alpha:
approaches:
- loss_constrained
param_grid:
loss.alpha: [0., 0.2, 0.5, 0.8, 0.9, 0.95, 0.99, 0.995, 0.999, 0.9999, 0.99999]
splits: [0]
seeds: [10]

data_reduction_consistency:
approaches:
- unconstrained
- architecture_constrained
- loss_constrained
param_override:
data.reduction: 0.001
net.l1: 32
net.l2: 32
net.embedding_size: 3
fit.lr: 0.001
fit.batch_size: 16
fit.max_epochs: 30
fit.patience: 5
splits: [0, 1, 2, 3]
seeds: [100, 200, 300]
34 changes: 34 additions & 0 deletions config/extras.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
plotting:
rcparams:
savefig.facecolor: "#ffffff"
savefig.dpi: 300
font.size: 13

var_long_names:
air_temperature: "Air temperature"
dew_point_temperature: "Dew point temperature"
surface_air_pressure: "Surface pressure"
relative_humidity: "Relative Humidity"
water_vapor_mixing_ratio: "Mixing ratio"

var_short_names:
air_temperature: "$T$"
dew_point_temperature: "$T_d$"
surface_air_pressure: "$P$"
relative_humidity: "$RH$"
water_vapor_mixing_ratio: "$r$"

var_units:
air_temperature: "°C"
dew_point_temperature: "°C"
surface_air_pressure: "hPa"
relative_humidity: "%"
water_vapor_mixing_ratio: "g kg$^{-1}$"

approach_names:
unconstrained: "Unconstrained"
architecture_constrained: "Architecture constrained"
loss_constrained: "Loss constrained"
offline_constrained: "Offline constrained"

approach_colors: ["#57B1FF", "#D81B60", "#FFC107", "#07846F"]
Loading

0 comments on commit d518207

Please sign in to comment.