From a3adda5313a3f036823324f4af7fad8adcd17e8d Mon Sep 17 00:00:00 2001
From: Wenyi Tang <wenyi.tang@intel.com>
Date: Sat, 30 May 2020 15:37:40 +0800
Subject: [PATCH 01/12] Update TF backend

---
 Train/par/tensorflow/carn.yaml              | 22 ++++----
 Train/par/tensorflow/dbpn.yaml              | 16 +++---
 Train/par/tensorflow/dcscn.yaml             | 24 +++++----
 Train/par/tensorflow/dncnn.yaml             | 12 +++--
 Train/par/tensorflow/drcn.yaml              | 18 ++++---
 Train/par/tensorflow/drrn.yaml              | 18 ++++---
 Train/par/tensorflow/drsr.yaml              | 42 ++++++++--------
 Train/par/tensorflow/drsr_v2.yaml           | 36 ++++++-------
 Train/par/tensorflow/duf.yaml               | 14 ++++--
 Train/par/tensorflow/edsr.yaml              | 16 +++---
 Train/par/tensorflow/espcn.yaml             | 10 ++--
 Train/par/tensorflow/ffdnet.yaml            | 14 +++---
 Train/par/tensorflow/frvsr.yaml             |  2 +
 Train/par/tensorflow/gangp.yaml             | 28 +++++------
 Train/par/tensorflow/idn.yaml               | 22 ++++----
 Train/par/tensorflow/lapsrn.yaml            | 10 ++--
 Train/par/tensorflow/lsgan.yaml             | 28 +++++------
 Train/par/tensorflow/memnet.yaml            | 14 ++++--
 Train/par/tensorflow/msrn.yaml              |  6 +--
 Train/par/tensorflow/nlrn.yaml              | 10 ++++
 Train/par/tensorflow/ragan.yaml             | 28 +++++------
 Train/par/tensorflow/ragangp.yaml           | 28 +++++------
 Train/par/tensorflow/ralsgan.yaml           | 28 +++++------
 Train/par/tensorflow/rcan.yaml              | 16 +++---
 Train/par/tensorflow/rdn.yaml               | 16 +++---
 Train/par/tensorflow/rgan.yaml              | 28 +++++------
 Train/par/tensorflow/rgangp.yaml            | 28 +++++------
 Train/par/tensorflow/rlsgan.yaml            | 28 +++++------
 Train/par/tensorflow/root.yaml              | 11 ----
 Train/par/tensorflow/sgan.yaml              | 28 +++++------
 Train/par/tensorflow/srcnn.yaml             | 16 +++---
 Train/par/tensorflow/srdensenet.yaml        |  9 ++--
 Train/par/tensorflow/srfeat.yaml            | 19 +++----
 Train/par/tensorflow/srgan.yaml             | 23 +++++----
 Train/par/tensorflow/vdsr.yaml              | 14 ++++--
 Train/par/tensorflow/vespcn.yaml            | 18 +++----
 Train/par/tensorflow/wgan.yaml              | 30 +++++------
 Train/par/tensorflow/wgangp.yaml            | 30 +++++------
 VSR/Backend/TF/Arch/Dense.py                |  3 +-
 VSR/Backend/TF/Arch/Discriminator.py        |  2 +-
 VSR/Backend/TF/Arch/Residual.py             |  7 ++-
 VSR/Backend/TF/Framework/GAN.py             | 56 +++++++++++----------
 VSR/Backend/TF/Framework/LayersHelper.py    | 10 ++--
 VSR/Backend/TF/Framework/Motion.py          |  3 +-
 VSR/Backend/TF/Framework/Noise.py           |  4 +-
 VSR/Backend/TF/Framework/SuperResolution.py |  8 +--
 VSR/Backend/TF/Framework/Trainer.py         |  4 +-
 VSR/Backend/TF/Models/Carn.py               |  5 +-
 VSR/Backend/TF/Models/Crdn.py               | 12 ++---
 VSR/Backend/TF/Models/Dbpn.py               |  2 +-
 VSR/Backend/TF/Models/Dcscn.py              |  3 +-
 VSR/Backend/TF/Models/DnCnn.py              |  3 +-
 VSR/Backend/TF/Models/Drcn.py               |  2 +-
 VSR/Backend/TF/Models/Drrn.py               |  3 +-
 VSR/Backend/TF/Models/Drsr.py               |  2 +-
 VSR/Backend/TF/Models/Drsr_v2.py            |  4 +-
 VSR/Backend/TF/Models/Duf.py                |  2 +-
 VSR/Backend/TF/Models/Edsr.py               |  3 +-
 VSR/Backend/TF/Models/Espcn.py              |  3 +-
 VSR/Backend/TF/Models/FFDNet.py             |  3 +-
 VSR/Backend/TF/Models/Gan.py                |  2 +-
 VSR/Backend/TF/Models/Idn.py                |  3 +-
 VSR/Backend/TF/Models/LapSrn.py             |  2 +-
 VSR/Backend/TF/Models/MemNet.py             |  3 +-
 VSR/Backend/TF/Models/Msrn.py               |  3 +-
 VSR/Backend/TF/Models/Nlrn.py               |  5 +-
 VSR/Backend/TF/Models/Rcan.py               |  3 +-
 VSR/Backend/TF/Models/Rdn.py                |  3 +-
 VSR/Backend/TF/Models/SRDenseNet.py         |  3 +-
 VSR/Backend/TF/Models/SRFeat.py             |  2 +-
 VSR/Backend/TF/Models/SrGan.py              |  2 +-
 VSR/Backend/TF/Models/Srcnn.py              |  2 +-
 VSR/Backend/TF/Models/Vdsr.py               |  3 +-
 VSR/Backend/TF/Models/Vespcn.py             |  5 +-
 VSR/Backend/TF/Util.py                      |  9 ++--
 VSR/Backend/TF/__init__.py                  | 16 ++++++
 VSR/DataLoader/Loader.py                    |  2 +-
 77 files changed, 508 insertions(+), 464 deletions(-)
 create mode 100644 Train/par/tensorflow/nlrn.yaml
 delete mode 100644 Train/par/tensorflow/root.yaml
 create mode 100644 VSR/Backend/TF/__init__.py

diff --git a/Train/par/tensorflow/carn.yaml b/Train/par/tensorflow/carn.yaml
index ee16909..6c4c119 100644
--- a/Train/par/tensorflow/carn.yaml
+++ b/Train/par/tensorflow/carn.yaml
@@ -1,16 +1,16 @@
 carn:
-  recursive: false
-  n_residual: 3
-  n_blocks: 3
-  filters: 64
-  clip: 10
-  weight_decay: 0
-  scale: 4
-  channel: 3
+    recursive: false
+    n_residual: 3
+    n_blocks: 3
+    filters: 64
+    clip: 10
+    weight_decay: 0
+    scale: 4
+    channel: 3
 
 batch_shape: [4, 16, 16, 3]
 lr: 1.0e-4
 lr_decay:
-  method: multistep
-  decay_step: [150000]
-  decay_rate: 0.1
+    method: multistep
+    decay_step: [150000]
+    decay_rate: 0.1
diff --git a/Train/par/tensorflow/dbpn.yaml b/Train/par/tensorflow/dbpn.yaml
index 09b4cd1..105dfd9 100644
--- a/Train/par/tensorflow/dbpn.yaml
+++ b/Train/par/tensorflow/dbpn.yaml
@@ -1,13 +1,13 @@
 dbpn:
-  bp_layers: 7
-  use_dense: true
-  scale: 4
-  channel: 3
+    bp_layers: 7
+    use_dense: true
+    scale: 4
+    channel: 3
 
 patch_size: 96
 batch: 16
-lr: 1.0e-6
+lr: 1.0e-4
 lr_decay:
-  method: multistep
-  decay_step: [60000, 160000]
-  decay_rate: 1
+    method: multistep
+    decay_step: [60000, 160000]
+    decay_rate: 1
diff --git a/Train/par/tensorflow/dcscn.yaml b/Train/par/tensorflow/dcscn.yaml
index d25f617..ad78562 100644
--- a/Train/par/tensorflow/dcscn.yaml
+++ b/Train/par/tensorflow/dcscn.yaml
@@ -1,11 +1,15 @@
 dcscn:
-  layers: 8
-  reconstruction_layers: 1
-  filters: 196
-  min_filters: 48
-  nin_filter: [64, 32]
-  reconst_filter: 32
-  filters_decay_gamma: 1.5
-  drop_out: 0.8
-  scale: 4
-  channel: 3
+    layers: 8
+    reconstruction_layers: 1
+    filters: 196
+    min_filters: 48
+    nin_filter: [64, 32]
+    reconst_filter: 32
+    filters_decay_gamma: 1.5
+    drop_out: 0.8
+    scale: 4
+    channel: 3
+
+batch: 16
+patch_size: 64
+lr: 1.0e-4
diff --git a/Train/par/tensorflow/dncnn.yaml b/Train/par/tensorflow/dncnn.yaml
index 938e3f2..8be6dde 100644
--- a/Train/par/tensorflow/dncnn.yaml
+++ b/Train/par/tensorflow/dncnn.yaml
@@ -1,5 +1,9 @@
 dncnn:
-  layers: 16
-  weight_decay: 1.0e-6
-  scale: 1
-  channel: 3
+    layers: 16
+    weight_decay: 1.0e-6
+    scale: 1
+    channel: 3
+
+batch: 16
+patch_size: 64
+lr: 1.0e-4
diff --git a/Train/par/tensorflow/drcn.yaml b/Train/par/tensorflow/drcn.yaml
index 6e3bee3..5b68062 100644
--- a/Train/par/tensorflow/drcn.yaml
+++ b/Train/par/tensorflow/drcn.yaml
@@ -1,8 +1,12 @@
 drcn:
-  recur: 16
-  filters: 256
-  alpha: 1
-  weight_decay: 1.0e-4
-  custom_upsample: false
-  scale: 4
-  channel: 3
\ No newline at end of file
+    recur: 16
+    filters: 256
+    alpha: 1
+    weight_decay: 1.0e-4
+    custom_upsample: false
+    scale: 4
+    channel: 3
+
+batch: 16
+patch_size: 64
+lr: 1.0e-4
diff --git a/Train/par/tensorflow/drrn.yaml b/Train/par/tensorflow/drrn.yaml
index fa8815e..520041e 100644
--- a/Train/par/tensorflow/drrn.yaml
+++ b/Train/par/tensorflow/drrn.yaml
@@ -1,8 +1,12 @@
 drrn:
-  residual_unit: 9
-  recursive_block: 1
-  grad_clip: 0.01
-  weight_decay: 1.0e-6
-  custom_upsample: false
-  scale: 4
-  channel: 3
\ No newline at end of file
+    residual_unit: 9
+    recursive_block: 1
+    grad_clip: 0.01
+    weight_decay: 1.0e-6
+    custom_upsample: false
+    scale: 4
+    channel: 3
+
+batch: 16
+patch_size: 64
+lr: 1.0e-4
diff --git a/Train/par/tensorflow/drsr.yaml b/Train/par/tensorflow/drsr.yaml
index 7f7f396..1d1a7a9 100644
--- a/Train/par/tensorflow/drsr.yaml
+++ b/Train/par/tensorflow/drsr.yaml
@@ -1,27 +1,27 @@
 ---
 drsr:
-  name: 'drsr'
-  n_cb: 4
-  n_crb: 4
-  weights: [1, 0.5, 1.0e-6, 1.0e-3]
-  finetune: 4000
-  noise_config:
-    max: 75.0
-    scale: 1.0
-    offset: 0.0
-    offset2: 0.06
-    penalty: 0.7
-    layers: 8
-    type: "gaussian"
-    crf: "../Data/crf.npz"
-  channel: 3
-  scale: 4
-  tfrecords: false
+    name: 'drsr'
+    n_cb: 4
+    n_crb: 4
+    weights: [1, 0.5, 1.0e-6, 1.0e-3]
+    finetune: 4000
+    noise_config:
+        max: 75.0
+        scale: 1.0
+        offset: 0.0
+        offset2: 0.06
+        penalty: 0.7
+        layers: 8
+        type: "gaussian"
+        crf: "../Data/crf.npz"
+    channel: 3
+    scale: 4
+    tfrecords: false
 
 patch_size: 64
 batch: 16
+lr: 1.0e-4
 lr_decay:
-  method: multistep
-  decay_step: []
-  decay_rate: 1.0
-
+    method: multistep
+    decay_step: []
+    decay_rate: 1.0
diff --git a/Train/par/tensorflow/drsr_v2.yaml b/Train/par/tensorflow/drsr_v2.yaml
index 3317bdf..3fb0b7e 100644
--- a/Train/par/tensorflow/drsr_v2.yaml
+++ b/Train/par/tensorflow/drsr_v2.yaml
@@ -1,25 +1,25 @@
 ---
 drsr_v2:
-  weights: [1, 10, 1.0e-5]
-  # mean_shift: [0, 0, 0]
-  level: 1
-  arch: 'crdb'
-  auto_shift: true
-  noise_config:
-    max: 75.0
-    scale: 1.0
-    offset: 0.0
-    penalty: 0.7
-    layers: 8
-    crf: "../Data/crf.npz"
-    valid: false
-  channel: 3
-  scale: 4
+    weights: [1, 10, 1.0e-5]
+    # mean_shift: [0, 0, 0]
+    level: 1
+    arch: 'crdb'
+    auto_shift: true
+    noise_config:
+        max: 75.0
+        scale: 1.0
+        offset: 0.0
+        penalty: 0.7
+        layers: 8
+        crf: "../Data/crf.npz"
+        valid: false
+    channel: 3
+    scale: 4
 
 patch_size: 128
 batch: 16
 lr: 1.0e-4
 lr_decay:
-  method: multistep
-  decay_step: [150000]
-  decay_rate: 0.1
+    method: multistep
+    decay_step: [150000]
+    decay_rate: 0.1
diff --git a/Train/par/tensorflow/duf.yaml b/Train/par/tensorflow/duf.yaml
index 2295193..fcb1a4e 100644
--- a/Train/par/tensorflow/duf.yaml
+++ b/Train/par/tensorflow/duf.yaml
@@ -1,6 +1,10 @@
 duf:
-  scale: 4
-  channel: 3
-  layers: 16
-  filter_size: [5, 5]
-  depth: 7
+    scale: 4
+    channel: 3
+    layers: 16
+    filter_size: [5, 5]
+    depth: 7
+
+batch: 16
+patch_size: 64
+lr: 1.0e-4
diff --git a/Train/par/tensorflow/edsr.yaml b/Train/par/tensorflow/edsr.yaml
index 96191e3..7df9b95 100644
--- a/Train/par/tensorflow/edsr.yaml
+++ b/Train/par/tensorflow/edsr.yaml
@@ -1,14 +1,14 @@
 edsr:
-  layers: 32
-  filters: 256
-  clip: 0.1
-  scale: 4
-  channel: 3
+    layers: 32
+    filters: 256
+    clip: 0.1
+    scale: 4
+    channel: 3
 
 batch: 16
 patch_size: 128
 lr: 1.0e-4
 lr_decay:
-  method: multistep
-  decay_step: [150000]
-  decay_rate: 0.1
+    method: multistep
+    decay_step: [150000]
+    decay_rate: 0.1
diff --git a/Train/par/tensorflow/espcn.yaml b/Train/par/tensorflow/espcn.yaml
index cc3dcb7..abbe98f 100644
--- a/Train/par/tensorflow/espcn.yaml
+++ b/Train/par/tensorflow/espcn.yaml
@@ -1,11 +1,11 @@
 # espcn 5-3-3
 ---
 espcn:
-  layers: 3
-  filters: [64, 32, 32]
-  kernel: [5, 3, 3]
-  scale: 4
-  channel: 3
+    layers: 3
+    filters: [64, 32, 32]
+    kernel: [5, 3, 3]
+    scale: 4
+    channel: 3
 
 batch_shape: [16, 16, 16, 3]
 lr: 1.0e-2
diff --git a/Train/par/tensorflow/ffdnet.yaml b/Train/par/tensorflow/ffdnet.yaml
index d422dd5..9e594c6 100644
--- a/Train/par/tensorflow/ffdnet.yaml
+++ b/Train/par/tensorflow/ffdnet.yaml
@@ -1,12 +1,12 @@
 ---
 ffdnet:
-  sigma: 75
-  space_down: 2
-  channel: 3
-  layers: 15
-  training: true
-  weight_decay: 0
-  scale: 1
+    sigma: 75
+    space_down: 2
+    channel: 3
+    layers: 15
+    training: true
+    weight_decay: 0
+    scale: 1
 
 batch: 32
 patch_size: 128
diff --git a/Train/par/tensorflow/frvsr.yaml b/Train/par/tensorflow/frvsr.yaml
index d271f8b..1bbca24 100644
--- a/Train/par/tensorflow/frvsr.yaml
+++ b/Train/par/tensorflow/frvsr.yaml
@@ -6,3 +6,5 @@ frvsr:
 
 depth: 10
 batch: 16
+patch_size: 64
+lr: 1.0e-4
diff --git a/Train/par/tensorflow/gangp.yaml b/Train/par/tensorflow/gangp.yaml
index 52a16f2..25e67d7 100644
--- a/Train/par/tensorflow/gangp.yaml
+++ b/Train/par/tensorflow/gangp.yaml
@@ -1,19 +1,19 @@
 ---
 gangp:
-  name: gangp
-  patch_size: 32 
-  z_dim: 128
-  init_filter: 512 
-  linear: true  # dense layer at the entry
-  norm_g: bn  # null, bn or sn
-  norm_d: sn
-  use_bias: true
-  optimizer:
-    name: adam
-    beta1: 0.5
-  channel: 3
-  scale: 1  # keep scale is necessary though it's unused
-  
+    name: gangp
+    patch_size: 32
+    z_dim: 128
+    init_filter: 512
+    linear: true  # dense layer at the entry
+    norm_g: bn  # null, bn or sn
+    norm_d: sn
+    use_bias: true
+    optimizer:
+        name: adam
+        beta1: 0.5
+    channel: 3
+    scale: 1  # keep scale is necessary though it's unused
+
 batch: 64
 test_batch: 100
 validate_every_n_epoch: 5
diff --git a/Train/par/tensorflow/idn.yaml b/Train/par/tensorflow/idn.yaml
index ca52ec6..a60d715 100644
--- a/Train/par/tensorflow/idn.yaml
+++ b/Train/par/tensorflow/idn.yaml
@@ -1,10 +1,14 @@
 idn:
-  blocks: 4
-  filters: 64
-  delta: 16
-  slice_factor: 4
-  leaky_slope: 0.05
-  weight_decay: 1.0e-4
-  fine_tune_epoch: 200
-  scale: 4
-  channel: 3
\ No newline at end of file
+    blocks: 4
+    filters: 64
+    delta: 16
+    slice_factor: 4
+    leaky_slope: 0.05
+    weight_decay: 1.0e-4
+    fine_tune_epoch: 200
+    scale: 4
+    channel: 3
+
+batch: 16
+patch_size: 64
+lr: 1.0e-4
diff --git a/Train/par/tensorflow/lapsrn.yaml b/Train/par/tensorflow/lapsrn.yaml
index fca0f80..ddcb50d 100644
--- a/Train/par/tensorflow/lapsrn.yaml
+++ b/Train/par/tensorflow/lapsrn.yaml
@@ -1,4 +1,8 @@
 lapsrn:
-  layers: 5
-  scale: 4
-  channel: 3
\ No newline at end of file
+    layers: 5
+    scale: 4
+    channel: 3
+
+batch: 16
+patch_size: 64
+lr: 1.0e-4
diff --git a/Train/par/tensorflow/lsgan.yaml b/Train/par/tensorflow/lsgan.yaml
index a9a5450..b75509a 100644
--- a/Train/par/tensorflow/lsgan.yaml
+++ b/Train/par/tensorflow/lsgan.yaml
@@ -1,19 +1,19 @@
 ---
 lsgan:
-  name: lsgan
-  patch_size: 32
-  z_dim: 128
-  init_filter: 512
-  linear: true  # dense layer at the entry
-  norm_g: bn  # null, bn or sn
-  norm_d: sn
-  use_bias: true
-  optimizer:
-    name: adam
-    beta1: 0.5
-  channel: 3
-  scale: 1  # keep scale is necessary though it's unused
-  
+    name: lsgan
+    patch_size: 32
+    z_dim: 128
+    init_filter: 512
+    linear: true  # dense layer at the entry
+    norm_g: bn  # null, bn or sn
+    norm_d: sn
+    use_bias: true
+    optimizer:
+        name: adam
+        beta1: 0.5
+    channel: 3
+    scale: 1  # keep scale is necessary though it's unused
+
 batch: 64
 test_batch: 100
 validate_every_n_epoch: 5
diff --git a/Train/par/tensorflow/memnet.yaml b/Train/par/tensorflow/memnet.yaml
index 3c4f2ab..35504fa 100644
--- a/Train/par/tensorflow/memnet.yaml
+++ b/Train/par/tensorflow/memnet.yaml
@@ -1,6 +1,10 @@
 memnet:
-  n_memblock: 6
-  n_recur: 6
-  filters: 64
-  scale: 4
-  channel: 3
\ No newline at end of file
+    n_memblock: 6
+    n_recur: 6
+    filters: 64
+    scale: 4
+    channel: 3
+
+batch: 16
+patch_size: 64
+lr: 1.0e-4
diff --git a/Train/par/tensorflow/msrn.yaml b/Train/par/tensorflow/msrn.yaml
index 1bd6417..bfc8202 100644
--- a/Train/par/tensorflow/msrn.yaml
+++ b/Train/par/tensorflow/msrn.yaml
@@ -1,8 +1,8 @@
 ---
 msrn:
-  n_blocks: 8
-  scale: 4
-  channel: 3
+    n_blocks: 8
+    scale: 4
+    channel: 3
 
 batch: 4
 patch_size: 64
diff --git a/Train/par/tensorflow/nlrn.yaml b/Train/par/tensorflow/nlrn.yaml
new file mode 100644
index 0000000..8f17324
--- /dev/null
+++ b/Train/par/tensorflow/nlrn.yaml
@@ -0,0 +1,10 @@
+nlrn:
+    scale: 4
+    channel: 3
+    filters: 128
+    recurrents: 12
+    clip: 2.5
+
+batch: 16
+patch_size: 64
+lr: 1.0e-4
diff --git a/Train/par/tensorflow/ragan.yaml b/Train/par/tensorflow/ragan.yaml
index 9603034..6731d27 100644
--- a/Train/par/tensorflow/ragan.yaml
+++ b/Train/par/tensorflow/ragan.yaml
@@ -1,19 +1,19 @@
 ---
 ragan:
-  name: ragan
-  patch_size: 32
-  z_dim: 128
-  init_filter: 512
-  linear: true  # dense layer at the entry
-  norm_g: bn  # null, bn or sn
-  norm_d: sn
-  use_bias: true
-  optimizer:
-    name: adam
-    beta1: 0.5
-  channel: 3
-  scale: 1  # keep scale is necessary though it's unused
-  
+    name: ragan
+    patch_size: 32
+    z_dim: 128
+    init_filter: 512
+    linear: true  # dense layer at the entry
+    norm_g: bn  # null, bn or sn
+    norm_d: sn
+    use_bias: true
+    optimizer:
+        name: adam
+        beta1: 0.5
+    channel: 3
+    scale: 1  # keep scale is necessary though it's unused
+
 batch: 64
 test_batch: 100
 validate_every_n_epoch: 5
diff --git a/Train/par/tensorflow/ragangp.yaml b/Train/par/tensorflow/ragangp.yaml
index 9974de5..9078840 100644
--- a/Train/par/tensorflow/ragangp.yaml
+++ b/Train/par/tensorflow/ragangp.yaml
@@ -1,19 +1,19 @@
 ---
 ragangp:
-  name: ragangp
-  patch_size: 32
-  z_dim: 128
-  init_filter: 512
-  linear: true  # dense layer at the entry
-  norm_g: bn  # null, bn or sn
-  norm_d: sn
-  use_bias: true
-  optimizer:
-    name: adam
-    beta1: 0.5
-  channel: 3
-  scale: 1  # keep scale is necessary though it's unused
-  
+    name: ragangp
+    patch_size: 32
+    z_dim: 128
+    init_filter: 512
+    linear: true  # dense layer at the entry
+    norm_g: bn  # null, bn or sn
+    norm_d: sn
+    use_bias: true
+    optimizer:
+        name: adam
+        beta1: 0.5
+    channel: 3
+    scale: 1  # keep scale is necessary though it's unused
+
 batch: 64
 test_batch: 100
 validate_every_n_epoch: 5
diff --git a/Train/par/tensorflow/ralsgan.yaml b/Train/par/tensorflow/ralsgan.yaml
index c3c4ee7..eba555e 100644
--- a/Train/par/tensorflow/ralsgan.yaml
+++ b/Train/par/tensorflow/ralsgan.yaml
@@ -1,19 +1,19 @@
 ---
 ralsgan:
-  name: ralsgan
-  patch_size: 32
-  z_dim: 128
-  init_filter: 512
-  linear: true  # dense layer at the entry
-  norm_g: bn  # null, bn or sn
-  norm_d: sn
-  use_bias: true
-  optimizer:
-    name: adam
-    beta1: 0.5
-  channel: 3
-  scale: 1  # keep scale is necessary though it's unused
-  
+    name: ralsgan
+    patch_size: 32
+    z_dim: 128
+    init_filter: 512
+    linear: true  # dense layer at the entry
+    norm_g: bn  # null, bn or sn
+    norm_d: sn
+    use_bias: true
+    optimizer:
+        name: adam
+        beta1: 0.5
+    channel: 3
+    scale: 1  # keep scale is necessary though it's unused
+
 batch: 64
 test_batch: 100
 validate_every_n_epoch: 5
diff --git a/Train/par/tensorflow/rcan.yaml b/Train/par/tensorflow/rcan.yaml
index 3c03d56..6dafa94 100644
--- a/Train/par/tensorflow/rcan.yaml
+++ b/Train/par/tensorflow/rcan.yaml
@@ -1,7 +1,11 @@
 rcan:
-  channel_downscaling: 16
-  n_rcab: 4
-  n_rg: 3
-  filters: 64
-  scale: 4
-  channel: 3
+    channel_downscaling: 16
+    n_rcab: 4
+    n_rg: 3
+    filters: 64
+    scale: 4
+    channel: 3
+
+batch: 16
+patch_size: 64
+lr: 1.0e-4
diff --git a/Train/par/tensorflow/rdn.yaml b/Train/par/tensorflow/rdn.yaml
index 5fe13b7..3e08fd8 100644
--- a/Train/par/tensorflow/rdn.yaml
+++ b/Train/par/tensorflow/rdn.yaml
@@ -2,9 +2,13 @@
 # (hard to train...)
 ---
 rdn:
-  rdb_blocks: 8
-  rdb_conv: 8
-  global_filters: 64
-  rdb_filters: 64
-  scale: 4
-  channel: 3
\ No newline at end of file
+    rdb_blocks: 8
+    rdb_conv: 8
+    global_filters: 64
+    rdb_filters: 64
+    scale: 4
+    channel: 3
+
+batch: 16
+patch_size: 64
+lr: 1.0e-4
diff --git a/Train/par/tensorflow/rgan.yaml b/Train/par/tensorflow/rgan.yaml
index f4ba2d4..227f97c 100644
--- a/Train/par/tensorflow/rgan.yaml
+++ b/Train/par/tensorflow/rgan.yaml
@@ -1,19 +1,19 @@
 ---
 rgan:
-  name: rgan
-  patch_size: 32
-  z_dim: 128
-  init_filter: 512
-  linear: true  # dense layer at the entry
-  norm_g: bn  # null, bn or sn
-  norm_d: sn
-  use_bias: true
-  optimizer:
-    name: adam
-    beta1: 0.5
-  channel: 3
-  scale: 1  # keep scale is necessary though it's unused
-  
+    name: rgan
+    patch_size: 32
+    z_dim: 128
+    init_filter: 512
+    linear: true  # dense layer at the entry
+    norm_g: bn  # null, bn or sn
+    norm_d: sn
+    use_bias: true
+    optimizer:
+        name: adam
+        beta1: 0.5
+    channel: 3
+    scale: 1  # keep scale is necessary though it's unused
+
 batch: 64
 test_batch: 100
 validate_every_n_epoch: 5
diff --git a/Train/par/tensorflow/rgangp.yaml b/Train/par/tensorflow/rgangp.yaml
index 537fca6..f120b5d 100644
--- a/Train/par/tensorflow/rgangp.yaml
+++ b/Train/par/tensorflow/rgangp.yaml
@@ -1,19 +1,19 @@
 ---
 rgangp:
-  name: rgangp
-  patch_size: 32
-  z_dim: 128
-  init_filter: 512
-  linear: true  # dense layer at the entry
-  norm_g: bn  # null, bn or sn
-  norm_d: sn
-  use_bias: true
-  optimizer:
-    name: adam
-    beta1: 0.5
-  channel: 3
-  scale: 1  # keep scale is necessary though it's unused
-  
+    name: rgangp
+    patch_size: 32
+    z_dim: 128
+    init_filter: 512
+    linear: true  # dense layer at the entry
+    norm_g: bn  # null, bn or sn
+    norm_d: sn
+    use_bias: true
+    optimizer:
+        name: adam
+        beta1: 0.5
+    channel: 3
+    scale: 1  # keep scale is necessary though it's unused
+
 batch: 64
 test_batch: 100
 validate_every_n_epoch: 5
diff --git a/Train/par/tensorflow/rlsgan.yaml b/Train/par/tensorflow/rlsgan.yaml
index 295cac5..5e4889d 100644
--- a/Train/par/tensorflow/rlsgan.yaml
+++ b/Train/par/tensorflow/rlsgan.yaml
@@ -1,19 +1,19 @@
 ---
 rlsgan:
-  name: rlsgan
-  patch_size: 32
-  z_dim: 128
-  init_filter: 512
-  linear: true  # dense layer at the entry
-  norm_g: bn  # null, bn or sn
-  norm_d: sn
-  use_bias: true
-  optimizer:
-    name: adam
-    beta1: 0.5
-  channel: 3
-  scale: 1  # keep scale is necessary though it's unused
-  
+    name: rlsgan
+    patch_size: 32
+    z_dim: 128
+    init_filter: 512
+    linear: true  # dense layer at the entry
+    norm_g: bn  # null, bn or sn
+    norm_d: sn
+    use_bias: true
+    optimizer:
+        name: adam
+        beta1: 0.5
+    channel: 3
+    scale: 1  # keep scale is necessary though it's unused
+
 batch: 64
 test_batch: 100
 validate_every_n_epoch: 5
diff --git a/Train/par/tensorflow/root.yaml b/Train/par/tensorflow/root.yaml
deleted file mode 100644
index a7194fb..0000000
--- a/Train/par/tensorflow/root.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-# Common tf for all models
-# Can also be override in individual yaml
----
-batch: 16
-patch_size: 48
-depth: 1
-lr: 1.0e-4
-lr_decay:
-  method: multistep
-  decay_step: []
-  decay_rate: 1
diff --git a/Train/par/tensorflow/sgan.yaml b/Train/par/tensorflow/sgan.yaml
index 0b97486..28ae6c9 100644
--- a/Train/par/tensorflow/sgan.yaml
+++ b/Train/par/tensorflow/sgan.yaml
@@ -1,19 +1,19 @@
 ---
 sgan:
-  name: sgan
-  patch_size: 32
-  z_dim: 128
-  init_filter: 512
-  linear: true  # dense layer at the entry
-  norm_g: bn  # null, bn or sn
-  norm_d: sn
-  use_bias: true
-  optimizer:
-    name: adam
-    beta1: 0.5
-  channel: 3
-  scale: 1  # keep scale is necessary though it's unused
-  
+    name: sgan
+    patch_size: 32
+    z_dim: 128
+    init_filter: 512
+    linear: true  # dense layer at the entry
+    norm_g: bn  # null, bn or sn
+    norm_d: sn
+    use_bias: true
+    optimizer:
+        name: adam
+        beta1: 0.5
+    channel: 3
+    scale: 1  # keep scale is necessary though it's unused
+
 batch: 64
 test_batch: 100
 validate_every_n_epoch: 5
diff --git a/Train/par/tensorflow/srcnn.yaml b/Train/par/tensorflow/srcnn.yaml
index 351e01e..4211ee1 100644
--- a/Train/par/tensorflow/srcnn.yaml
+++ b/Train/par/tensorflow/srcnn.yaml
@@ -1,14 +1,14 @@
 # srcnn 9-5-5
 ---
 srcnn:
-  layers: 3
-  kernel:
-    - 9
-    - 5
-    - 5
-  custom_upsample: false
-  scale: 4
-  channel: 1
+    layers: 3
+    kernel:
+        - 9
+        - 5
+        - 5
+    custom_upsample: false
+    scale: 4
+    channel: 1
 
 batch: 4
 patch_size: 64
diff --git a/Train/par/tensorflow/srdensenet.yaml b/Train/par/tensorflow/srdensenet.yaml
index cba4d14..372fd35 100644
--- a/Train/par/tensorflow/srdensenet.yaml
+++ b/Train/par/tensorflow/srdensenet.yaml
@@ -1,8 +1,9 @@
 ---
 srdensenet:
-  n_blocks: 8
-  scale: 4
-  channel: 3
+    n_blocks: 8
+    scale: 4
+    channel: 3
 
 patch_size: 100
-batch: 16
\ No newline at end of file
+batch: 16
+lr: 1.0e-4
\ No newline at end of file
diff --git a/Train/par/tensorflow/srfeat.yaml b/Train/par/tensorflow/srfeat.yaml
index 8a68253..5f8b136 100644
--- a/Train/par/tensorflow/srfeat.yaml
+++ b/Train/par/tensorflow/srfeat.yaml
@@ -1,13 +1,14 @@
 ---
 srfeat:
-  glayers: 16
-  dlayers: 4
-  vgg_layer: 'block5_conv4'
-  init_epoch: 100
-  gan_weight: 1.0e-3
-  vgg_weight: 0.1569  # (1 / 12.75)^2
-  scale: 4
-  channel: 3
+    glayers: 16
+    dlayers: 4
+    vgg_layer: 'block5_conv4'
+    init_epoch: 100
+    gan_weight: 1.0e-3
+    vgg_weight: 0.1569  # (1 / 12.75)^2
+    scale: 4
+    channel: 3
 
 patch_size: 128
-batch: 16
\ No newline at end of file
+batch: 16
+lr: 1.0e-4
\ No newline at end of file
diff --git a/Train/par/tensorflow/srgan.yaml b/Train/par/tensorflow/srgan.yaml
index 99a1f32..1499d96 100644
--- a/Train/par/tensorflow/srgan.yaml
+++ b/Train/par/tensorflow/srgan.yaml
@@ -1,15 +1,16 @@
 ---
 srgan:
-  glayers: 16
-  dlayers: 4
-  vgg_layer: 'block2_conv2'
-  init_epoch: 100
-  vgg_weight: 1.0e-6
-  use_vgg: true
-  mse_weight: 1
-  gan_weight: 0.001
-  scale: 4
-  channel: 3
+    glayers: 16
+    dlayers: 4
+    vgg_layer: 'block2_conv2'
+    init_epoch: 100
+    vgg_weight: 1.0e-6
+    use_vgg: true
+    mse_weight: 1
+    gan_weight: 0.001
+    scale: 4
+    channel: 3
 
 patch_size: 128
-batch: 16
\ No newline at end of file
+batch: 16
+lr: 1.0e-4
\ No newline at end of file
diff --git a/Train/par/tensorflow/vdsr.yaml b/Train/par/tensorflow/vdsr.yaml
index 27ef142..2315d4d 100644
--- a/Train/par/tensorflow/vdsr.yaml
+++ b/Train/par/tensorflow/vdsr.yaml
@@ -1,7 +1,11 @@
 ---
 vdsr:
-  layers: 20
-  filters: 64
-  custom_upsample: false
-  scale: 4
-  channel: 1
\ No newline at end of file
+    layers: 20
+    filters: 64
+    custom_upsample: false
+    scale: 4
+    channel: 1
+
+batch: 16
+patch_size: 64
+lr: 1.0e-4
diff --git a/Train/par/tensorflow/vespcn.yaml b/Train/par/tensorflow/vespcn.yaml
index 9fb9c6c..1d83843 100644
--- a/Train/par/tensorflow/vespcn.yaml
+++ b/Train/par/tensorflow/vespcn.yaml
@@ -1,17 +1,17 @@
 # VESPCN need data depth to be 3, 5, or 7. (2n + 1)
 ---
 vespcn:
-  depth: 3  # this `depth` is to inform graph builder
-  beta: 1
-  gamma: 0.01
-  scale: 4
-  channel: 3
-  weight_decay: 0
+    depth: 3  # this `depth` is to inform graph builder
+    beta: 1
+    gamma: 0.01
+    scale: 4
+    channel: 3
+    weight_decay: 0
 batch: 4
 patch_size: 96
 depth: 3  # must be same as the model depth
 lr: 1.0e-4
 lr_decay:
-  method: multistep
-  decay_step: []
-  decay_rate: 1
+    method: multistep
+    decay_step: []
+    decay_rate: 1
diff --git a/Train/par/tensorflow/wgan.yaml b/Train/par/tensorflow/wgan.yaml
index d56bdab..fcf53be 100644
--- a/Train/par/tensorflow/wgan.yaml
+++ b/Train/par/tensorflow/wgan.yaml
@@ -1,20 +1,20 @@
 ---
 wgan:
-  name: wgan
-  patch_size: 32
-  z_dim: 128
-  init_filter: 512
-  linear: true  # dense layer at the entry
-  norm_g: bn  # null, bn or sn
-  norm_d: sn
-  nd_iter: 5
-  use_bias: true
-  optimizer:
-    name: rmsprop
-    
-  channel: 3
-  scale: 1  # keep scale is necessary though it's unused
-  
+    name: wgan
+    patch_size: 32
+    z_dim: 128
+    init_filter: 512
+    linear: true  # dense layer at the entry
+    norm_g: bn  # null, bn or sn
+    norm_d: sn
+    nd_iter: 5
+    use_bias: true
+    optimizer:
+        name: rmsprop
+
+    channel: 3
+    scale: 1  # keep scale is necessary though it's unused
+
 batch: 64
 test_batch: 100
 validate_every_n_epoch: 5
diff --git a/Train/par/tensorflow/wgangp.yaml b/Train/par/tensorflow/wgangp.yaml
index ec2279b..392b2e1 100644
--- a/Train/par/tensorflow/wgangp.yaml
+++ b/Train/par/tensorflow/wgangp.yaml
@@ -1,20 +1,20 @@
 ---
 wgangp:
-  name: wgangp
-  patch_size: 32
-  z_dim: 128
-  init_filter: 512
-  linear: true  # dense layer at the entry
-  norm_g: bn  # null, bn or sn
-  norm_d: sn
-  use_bias: true
-  nd_iter: 5
-  optimizer:
-    name: adam
-    beta1: 0.5
-  channel: 3
-  scale: 1  # keep scale is necessary though it's unused
-  
+    name: wgangp
+    patch_size: 32
+    z_dim: 128
+    init_filter: 512
+    linear: true  # dense layer at the entry
+    norm_g: bn  # null, bn or sn
+    norm_d: sn
+    use_bias: true
+    nd_iter: 5
+    optimizer:
+        name: adam
+        beta1: 0.5
+    channel: 3
+    scale: 1  # keep scale is necessary though it's unused
+
 batch: 64
 test_batch: 100
 validate_every_n_epoch: 5
diff --git a/VSR/Backend/TF/Arch/Dense.py b/VSR/Backend/TF/Arch/Dense.py
index 395985d..84d599a 100644
--- a/VSR/Backend/TF/Arch/Dense.py
+++ b/VSR/Backend/TF/Arch/Dense.py
@@ -7,8 +7,7 @@
 Architectures of common dense blocks used in SR researches
 """
 
-import tensorflow as tf
-
+from .. import tf
 from ..Framework.LayersHelper import Layers
 
 
diff --git a/VSR/Backend/TF/Arch/Discriminator.py b/VSR/Backend/TF/Arch/Discriminator.py
index 3823a4e..d9911c7 100644
--- a/VSR/Backend/TF/Arch/Discriminator.py
+++ b/VSR/Backend/TF/Arch/Discriminator.py
@@ -7,8 +7,8 @@
 Architectures of common discriminator
 """
 import numpy as np
-import tensorflow as tf
 
+from .. import tf
 from ..Framework.LayersHelper import Layers
 
 
diff --git a/VSR/Backend/TF/Arch/Residual.py b/VSR/Backend/TF/Arch/Residual.py
index 61e2ec7..f241c64 100644
--- a/VSR/Backend/TF/Arch/Residual.py
+++ b/VSR/Backend/TF/Arch/Residual.py
@@ -6,9 +6,8 @@
 
 Architectures of common residual blocks used in SR researches
 """
-import tensorflow as tf
-
 from VSR.Util import to_list
+from .. import tf
 from ..Framework.LayersHelper import Layers
 
 
@@ -81,8 +80,8 @@ def cascade_block(layers: Layers, inputs, depth=4,
       x = layers.resblock(inputs, f, k, activation=act)
       feat.append(x)
       inputs = layers.conv2d(
-        tf.concat(feat, axis=-1), f, 1,
-        kernel_initializer='he_uniform')
+          tf.concat(feat, axis=-1), f, 1,
+          kernel_initializer='he_uniform')
     inputs = layers.conv2d(inputs, f, k)
     return inputs
 
diff --git a/VSR/Backend/TF/Framework/GAN.py b/VSR/Backend/TF/Framework/GAN.py
index 25e4255..9134029 100644
--- a/VSR/Backend/TF/Framework/GAN.py
+++ b/VSR/Backend/TF/Framework/GAN.py
@@ -1,5 +1,5 @@
 """
-Copyright: Wenyi Tang 2017-2018
+Copyright: Wenyi Tang 2017-2020
 Author: Wenyi Tang
 Email: wenyi.tang@intel.com
 Created Date: July 20th 2018
@@ -11,10 +11,14 @@
 from functools import partial
 
 import numpy as np
-import tensorflow as tf
+from .. import tf, tfc, ver_major, ver_minor
 
 _INCEPTION_BATCH = 50
-_TFGAN = tf.contrib.gan.eval
+if ver_major == 1 and ver_minor <= 14:
+  _TFGAN = tfc.gan.eval
+else:
+  raise ImportError("tfc.gan was removed since 1.15.0. "
+                    "Please downgrade to 1.14.0 or use pytorch backend.")
 
 
 def _preprocess_for_inception(images):
@@ -36,9 +40,9 @@ def _preprocess_for_inception(images):
     images = tf.identity(images)
 
   preprocessed_images = tf.map_fn(
-    fn=_TFGAN.preprocess_image,
-    elems=images,
-    back_prop=False)
+      fn=_TFGAN.preprocess_image,
+      elems=images,
+      back_prop=False)
 
   return preprocessed_images
 
@@ -51,7 +55,7 @@ def _run_inception(images, layer_name, inception_graph):
 
 
 def fid_score(real_image, gen_image, num_batches=None):
-  """FID function from tf.contrib
+  """FID function from tfc
 
   Args:
       real_image: must be 4-D tensor, ranges from [0, 255]
@@ -67,9 +71,9 @@ def fid_score(real_image, gen_image, num_batches=None):
   if not num_batches:
     num_batches = (batches + _INCEPTION_BATCH - 1) // _INCEPTION_BATCH
   graph = _TFGAN.get_graph_def_from_url_tarball(
-    'http://download.tensorflow.org/models/frozen_inception_v1_2015_12_05.tar.gz',
-    'inceptionv1_for_inception_score.pb',
-    '/tmp/frozen_inception_v1_2015_12_05.tar.gz')
+      'http://download.tensorflow.org/models/frozen_inception_v1_2015_12_05.tar.gz',
+      'inceptionv1_for_inception_score.pb',
+      '/tmp/frozen_inception_v1_2015_12_05.tar.gz')
   # make tensor batches
   real_ph = tf.placeholder(tf.float32,
                            [_INCEPTION_BATCH, *real_image.shape[1:]])
@@ -84,22 +88,22 @@ def fid_score(real_image, gen_image, num_batches=None):
   gen_image = np.split(gen_image, num_batches)
   for i in range(num_batches):
     r, g = sess.run(
-      [real_features, gen_features],
-      feed_dict={real_ph: real_image[i], gen_ph: gen_image[i]})
+        [real_features, gen_features],
+        feed_dict={real_ph: real_image[i], gen_ph: gen_image[i]})
     real_feature_np.append(r)
     gen_feature_np.append(g)
   real_feature_np = np.concatenate(real_feature_np)
   gen_feature_np = np.concatenate(gen_feature_np)
   fid_tensor = _TFGAN.frechet_classifier_distance(
-    classifier_fn=tf.identity,
-    real_images=real_feature_np,
-    generated_images=gen_feature_np,
-    num_batches=num_batches)
+      classifier_fn=tf.identity,
+      real_images=real_feature_np,
+      generated_images=gen_feature_np,
+      num_batches=num_batches)
   return fid_tensor
 
 
 def inception_score(images, num_batches=None):
-  """IS function from tf.contrib
+  """IS function from tfc
 
   Args:
       images: must be 4-D tensor, ranges from [0, 255]
@@ -110,15 +114,15 @@ def inception_score(images, num_batches=None):
   if not num_batches:
     num_batches = (batches + _INCEPTION_BATCH - 1) // _INCEPTION_BATCH
   graph = _TFGAN.get_graph_def_from_url_tarball(
-    'http://download.tensorflow.org/models/frozen_inception_v1_2015_12_05.tar.gz',
-    'inceptionv1_for_inception_score.pb',
-    '/tmp/frozen_inception_v1_2015_12_05.tar.gz')
+      'http://download.tensorflow.org/models/frozen_inception_v1_2015_12_05.tar.gz',
+      'inceptionv1_for_inception_score.pb',
+      '/tmp/frozen_inception_v1_2015_12_05.tar.gz')
   return _TFGAN.classifier_score(
-    images=images,
-    classifier_fn=partial(_run_inception,
-                          layer_name='logits:0',
-                          inception_graph=graph),
-    num_batches=num_batches)
+      images=images,
+      classifier_fn=partial(_run_inception,
+                            layer_name='logits:0',
+                            inception_graph=graph),
+      num_batches=num_batches)
 
 
 def loss_bce_gan(y_real, y_fake):
@@ -168,7 +172,7 @@ def gradient_penalty(y_true, y_pred, graph_fn, lamb=10):
   interp = y_true + alpha * diff
   gradients = tf.gradients(graph_fn(interp), [interp])[0]
   slopes = tf.sqrt(1e-4 + tf.reduce_sum(
-    tf.square(gradients), reduction_indices=[1, 2, 3]))
+      tf.square(gradients), reduction_indices=[1, 2, 3]))
   gp = tf.reduce_mean(tf.square(slopes - 1.))
   return lamb * gp
 
diff --git a/VSR/Backend/TF/Framework/LayersHelper.py b/VSR/Backend/TF/Framework/LayersHelper.py
index abf6f5b..26a5f85 100644
--- a/VSR/Backend/TF/Framework/LayersHelper.py
+++ b/VSR/Backend/TF/Framework/LayersHelper.py
@@ -1,14 +1,14 @@
 """
-Copyright: Wenyi Tang 2017-2018
+Copyright: Wenyi Tang 2017-2020
 Author: Wenyi Tang
 Email: wenyi.tang@intel.com
 Created Date: Sep 5th 2018
 
 commonly used layers helper
 """
-import tensorflow as tf
 
 from VSR.Util import to_list
+from .. import tf, tfc
 from ..Util import (
   SpectralNorm, TorchInitializer, pixel_shift, pop_dict_wo_keyerror, prelu
 )
@@ -26,21 +26,21 @@ def batch_norm(self, x, training, decay=0.9, epsilon=1e-5, name=None):
 
   def instance_norm(self, x, trainable=True, name=None, reuse=None):
     with tf.variable_scope(name, 'InstanceNorm', reuse=reuse):
-      return tf.contrib.layers.instance_norm(
+      return tfc.layers.instance_norm(
           x,
           trainable=trainable,
           variables_collections=[tf.GraphKeys.GLOBAL_VARIABLES])
 
   def layer_norm(self, x, trainable=True, name=None, reuse=None):
     with tf.variable_scope(name, 'LayerNorm', reuse=reuse):
-      return tf.contrib.layers.layer_norm(
+      return tfc.layers.layer_norm(
           x,
           trainable=trainable,
           variables_collections=[tf.GraphKeys.GLOBAL_VARIABLES])
 
   def group_norm(self, x, group, axis, trainable=True, name=None, reuse=None):
     with tf.variable_scope(name, 'GroupNorm', reuse=reuse):
-      return tf.contrib.layers.group_norm(
+      return tfc.layers.group_norm(
           x, group, axis,
           trainable=trainable,
           variables_collections=[tf.GraphKeys.GLOBAL_VARIABLES])
diff --git a/VSR/Backend/TF/Framework/Motion.py b/VSR/Backend/TF/Framework/Motion.py
index 79e02b5..a41521d 100644
--- a/VSR/Backend/TF/Framework/Motion.py
+++ b/VSR/Backend/TF/Framework/Motion.py
@@ -7,7 +7,8 @@
 Utility for motion compensation
 """
 import numpy as np
-import tensorflow as tf
+
+from .. import tf
 
 
 def _grid_norm(width, height, bounds=(-1.0, 1.0)):
diff --git a/VSR/Backend/TF/Framework/Noise.py b/VSR/Backend/TF/Framework/Noise.py
index 0293283..347788a 100644
--- a/VSR/Backend/TF/Framework/Noise.py
+++ b/VSR/Backend/TF/Framework/Noise.py
@@ -1,5 +1,5 @@
 """
-Copyright: Wenyi Tang 2017-2019
+Copyright: Wenyi Tang 2017-2020
 Author: Wenyi Tang
 Email: wenyi.tang@intel.com
 Created Date: Dec 25th 2018
@@ -9,7 +9,7 @@
 [1] https://arxiv.org/abs/1807.04686
 """
 
-import tensorflow as tf
+from .. import tf
 
 
 def tf_camera_response_function(inputs, crf_table, max_val=1):
diff --git a/VSR/Backend/TF/Framework/SuperResolution.py b/VSR/Backend/TF/Framework/SuperResolution.py
index 21ce71c..312637a 100644
--- a/VSR/Backend/TF/Framework/SuperResolution.py
+++ b/VSR/Backend/TF/Framework/SuperResolution.py
@@ -1,5 +1,5 @@
 """
-Copyright: Wenyi Tang 2017-2018
+Copyright: Wenyi Tang 2017-2020
 Author: Wenyi Tang
 Email: wenyi.tang@intel.com
 Created Date: May 9th 2018
@@ -10,14 +10,14 @@
 import logging
 from pathlib import Path
 
-import tensorflow as tf
-
 from VSR.Util import to_list
 from .LayersHelper import Layers
 from .Trainer import VSR
+from .. import tf
 
 LOG = logging.getLogger('VSR.Framework.TF')
 
+
 class SuperResolution(Layers):
   """A utility class helps for building SR architectures easily
 
@@ -130,7 +130,7 @@ def build_graph(self):
     self.inputs.append(
         tf.placeholder(tf.uint8, shape=[None, None, None, None],
                        name='input/lr'))
-    inputs_f = tf.to_float(self.inputs[0])
+    inputs_f = tf.cast(self.inputs[0], dtype=tf.float32)
     # separate additional channels (e.g. alpha channel)
     self.inputs_preproc.append(inputs_f[..., self.channel:])
     self.inputs_preproc.append(inputs_f[..., :self.channel])
diff --git a/VSR/Backend/TF/Framework/Trainer.py b/VSR/Backend/TF/Framework/Trainer.py
index 098f1a5..a0a4fc6 100644
--- a/VSR/Backend/TF/Framework/Trainer.py
+++ b/VSR/Backend/TF/Framework/Trainer.py
@@ -1,5 +1,5 @@
 """
-Copyright: Wenyi Tang 2017-2018
+Copyright: Wenyi Tang 2017-2020
 Author: Wenyi Tang
 Email: wenyi.tang@intel.com
 Created Date: Oct 15th 2018
@@ -13,10 +13,10 @@
 from pathlib import Path
 
 import numpy as np
-import tensorflow as tf
 import tqdm
 
 from VSR.Util import Config, to_list
+from .. import tf
 
 LOG = logging.getLogger('VSR.Framework')
 
diff --git a/VSR/Backend/TF/Models/Carn.py b/VSR/Backend/TF/Models/Carn.py
index 80e4d1c..e4504b6 100644
--- a/VSR/Backend/TF/Models/Carn.py
+++ b/VSR/Backend/TF/Models/Carn.py
@@ -8,8 +8,7 @@
 See https://arxiv.org/abs/1803.08664
 """
 
-import tensorflow as tf
-
+from .. import tf, tfc
 from ..Framework.SuperResolution import SuperResolution
 
 
@@ -97,7 +96,7 @@ def build_loss(self):
       with tf.control_dependencies(update_op):
         opt = tf.train.AdamOptimizer(self.learning_rate)
         var_n_grad = opt.compute_gradients(loss)
-        grad_clip = tf.contrib.training.clip_gradient_norms(
+        grad_clip = tfc.training.clip_gradient_norms(
           var_n_grad, self.clip)
         opt = opt.apply_gradients(grad_clip, self.global_steps)
         self.loss.append(opt)
diff --git a/VSR/Backend/TF/Models/Crdn.py b/VSR/Backend/TF/Models/Crdn.py
index 89fccfb..bf5c42a 100644
--- a/VSR/Backend/TF/Models/Crdn.py
+++ b/VSR/Backend/TF/Models/Crdn.py
@@ -7,13 +7,7 @@
 Cascaded Residual Dense Network (NTIRE 2019)
 """
 
-#  Copyright (c): Wenyi Tang 2017-2019.
-#  Author: Wenyi Tang
-#  Email: wenyi.tang@intel.com
-#  Update Date: 2019/4/3 下午8:28
-
-import tensorflow as tf
-
+from .. import tf
 from ..Arch.Residual import cascade_rdn
 from ..Framework.SuperResolution import SuperResolution
 from ..Util import clip_image
@@ -85,9 +79,9 @@ def build_loss(self):
 
       self.train_metric['l1'] = l1
       self.metrics['psnr'] = tf.reduce_mean(
-        tf.image.psnr(self.label[-1], self.outputs[-1], max_val=255))
+          tf.image.psnr(self.label[-1], self.outputs[-1], max_val=255))
       self.metrics['ssim'] = tf.reduce_mean(
-        tf.image.ssim(self.label[-1], self.outputs[-1], max_val=255))
+          tf.image.ssim(self.label[-1], self.outputs[-1], max_val=255))
 
   def build_summary(self):
     super(CRDN, self).build_summary()
diff --git a/VSR/Backend/TF/Models/Dbpn.py b/VSR/Backend/TF/Models/Dbpn.py
index 2dc7905..c3c478d 100644
--- a/VSR/Backend/TF/Models/Dbpn.py
+++ b/VSR/Backend/TF/Models/Dbpn.py
@@ -10,8 +10,8 @@
 """
 
 import numpy as np
-import tensorflow as tf
 
+from .. import tf
 from ..Framework.SuperResolution import SuperResolution
 
 
diff --git a/VSR/Backend/TF/Models/Dcscn.py b/VSR/Backend/TF/Models/Dcscn.py
index 6772236..bb2f42f 100644
--- a/VSR/Backend/TF/Models/Dcscn.py
+++ b/VSR/Backend/TF/Models/Dcscn.py
@@ -9,8 +9,7 @@
 Deep CNN with Skip Connection and Network in Network
 See https://arxiv.org/abs/1707.05425
 """
-import tensorflow as tf
-
+from .. import tf
 from ..Framework.SuperResolution import SuperResolution
 
 
diff --git a/VSR/Backend/TF/Models/DnCnn.py b/VSR/Backend/TF/Models/DnCnn.py
index f04342b..86e2c39 100644
--- a/VSR/Backend/TF/Models/DnCnn.py
+++ b/VSR/Backend/TF/Models/DnCnn.py
@@ -8,8 +8,7 @@
 Implementing Feed-forward Denoising Convolutional Neural Network
 See http://ieeexplore.ieee.org/document/7839189/
 """
-import tensorflow as tf
-
+from .. import tf
 from ..Framework.SuperResolution import SuperResolution
 
 
diff --git a/VSR/Backend/TF/Models/Drcn.py b/VSR/Backend/TF/Models/Drcn.py
index 0cb58a4..8010b75 100644
--- a/VSR/Backend/TF/Models/Drcn.py
+++ b/VSR/Backend/TF/Models/Drcn.py
@@ -10,8 +10,8 @@
 """
 
 import numpy as np
-import tensorflow as tf
 
+from .. import tf
 from ..Framework.SuperResolution import SuperResolution
 from ..Util import bicubic_rescale
 
diff --git a/VSR/Backend/TF/Models/Drrn.py b/VSR/Backend/TF/Models/Drrn.py
index 2f05774..0499322 100644
--- a/VSR/Backend/TF/Models/Drrn.py
+++ b/VSR/Backend/TF/Models/Drrn.py
@@ -11,8 +11,7 @@
 
 import logging
 
-import tensorflow as tf
-
+from .. import tf
 from ..Framework.SuperResolution import SuperResolution
 from ..Util import bicubic_rescale
 
diff --git a/VSR/Backend/TF/Models/Drsr.py b/VSR/Backend/TF/Models/Drsr.py
index 23c0412..0200f59 100644
--- a/VSR/Backend/TF/Models/Drsr.py
+++ b/VSR/Backend/TF/Models/Drsr.py
@@ -7,9 +7,9 @@
 import logging
 
 import numpy as np
-import tensorflow as tf
 
 from VSR.Util import Config
+from .. import tf
 from ..Framework import Noise, Trainer
 from ..Framework.SuperResolution import SuperResolution
 from ..Util import summary_tensor_image
diff --git a/VSR/Backend/TF/Models/Drsr_v2.py b/VSR/Backend/TF/Models/Drsr_v2.py
index 7387c27..2d01f9a 100644
--- a/VSR/Backend/TF/Models/Drsr_v2.py
+++ b/VSR/Backend/TF/Models/Drsr_v2.py
@@ -3,13 +3,13 @@
 #  Email: wenyi.tang@intel.com
 #  Update Date: 2019 - 2 - 28
 
-from functools import partial
 import logging
+from functools import partial
 
 import numpy as np
-import tensorflow as tf
 
 from VSR.Util import Config, to_list
+from .. import tf
 from ..Arch.Residual import cascade_rdn
 from ..Framework import Noise
 from ..Framework.SuperResolution import SuperResolution
diff --git a/VSR/Backend/TF/Models/Duf.py b/VSR/Backend/TF/Models/Duf.py
index 4277fa1..5688ef8 100644
--- a/VSR/Backend/TF/Models/Duf.py
+++ b/VSR/Backend/TF/Models/Duf.py
@@ -9,9 +9,9 @@
 """
 
 import numpy as np
-import tensorflow as tf
 
 from VSR.Util import to_list
+from .. import tf
 from ..Framework.SuperResolution import SuperResolution
 from ..Util import pixel_shift
 
diff --git a/VSR/Backend/TF/Models/Edsr.py b/VSR/Backend/TF/Models/Edsr.py
index 368608d..2adf7d1 100644
--- a/VSR/Backend/TF/Models/Edsr.py
+++ b/VSR/Backend/TF/Models/Edsr.py
@@ -9,8 +9,7 @@
 See https://arxiv.org/abs/1707.02921
 """
 
-import tensorflow as tf
-
+from .. import tf
 from ..Framework.SuperResolution import SuperResolution
 
 
diff --git a/VSR/Backend/TF/Models/Espcn.py b/VSR/Backend/TF/Models/Espcn.py
index e2284de..059c0ee 100644
--- a/VSR/Backend/TF/Models/Espcn.py
+++ b/VSR/Backend/TF/Models/Espcn.py
@@ -8,9 +8,8 @@
 Efficient Sub-Pixel Convolutional Neural Network
 Ref https://arxiv.org/abs/1609.05158
 """
-import tensorflow as tf
-
 from VSR.Util import to_list
+from .. import tf
 from ..Framework.SuperResolution import SuperResolution
 
 
diff --git a/VSR/Backend/TF/Models/FFDNet.py b/VSR/Backend/TF/Models/FFDNet.py
index c1f35f3..ca468fc 100644
--- a/VSR/Backend/TF/Models/FFDNet.py
+++ b/VSR/Backend/TF/Models/FFDNet.py
@@ -3,8 +3,7 @@
 #  Email: wenyi.tang@intel.com
 #  Update Date: 2019/4/25 下午2:13
 
-import tensorflow as tf
-
+from .. import tf
 from ..Framework.SuperResolution import SuperResolution
 
 
diff --git a/VSR/Backend/TF/Models/Gan.py b/VSR/Backend/TF/Models/Gan.py
index 0a80d5c..c607a16 100644
--- a/VSR/Backend/TF/Models/Gan.py
+++ b/VSR/Backend/TF/Models/Gan.py
@@ -8,10 +8,10 @@
 """
 
 import numpy as np
-import tensorflow as tf
 import tqdm
 
 from VSR.Util import Config, to_list
+from .. import tf
 from ..Arch import Discriminator
 from ..Framework.GAN import (
   gradient_penalty, inception_score, loss_bce_gan,
diff --git a/VSR/Backend/TF/Models/Idn.py b/VSR/Backend/TF/Models/Idn.py
index c3c08a5..1fed16e 100644
--- a/VSR/Backend/TF/Models/Idn.py
+++ b/VSR/Backend/TF/Models/Idn.py
@@ -9,8 +9,7 @@
 See https://arxiv.org/abs/1803.09454
 """
 
-import tensorflow as tf
-
+from .. import tf
 from ..Framework.SuperResolution import SuperResolution
 
 
diff --git a/VSR/Backend/TF/Models/LapSrn.py b/VSR/Backend/TF/Models/LapSrn.py
index ebbd1cd..0f35940 100644
--- a/VSR/Backend/TF/Models/LapSrn.py
+++ b/VSR/Backend/TF/Models/LapSrn.py
@@ -9,9 +9,9 @@
 Ref http://vllab.ucmerced.edu/wlai24/LapSRN
 """
 import numpy as np
-import tensorflow as tf
 
 from VSR.Util import to_list
+from .. import tf
 from ..Framework.SuperResolution import SuperResolution
 from ..Util import bicubic_rescale
 
diff --git a/VSR/Backend/TF/Models/MemNet.py b/VSR/Backend/TF/Models/MemNet.py
index 60cbef7..3ac1e6a 100644
--- a/VSR/Backend/TF/Models/MemNet.py
+++ b/VSR/Backend/TF/Models/MemNet.py
@@ -8,9 +8,8 @@
 See https://arxiv.org/abs/1708.02209
 """
 
-import tensorflow as tf
-
 from VSR.Util import to_list
+from .. import tf
 from ..Framework.SuperResolution import SuperResolution
 from ..Util import bicubic_rescale
 
diff --git a/VSR/Backend/TF/Models/Msrn.py b/VSR/Backend/TF/Models/Msrn.py
index 9d342eb..ad37b94 100644
--- a/VSR/Backend/TF/Models/Msrn.py
+++ b/VSR/Backend/TF/Models/Msrn.py
@@ -8,8 +8,7 @@
 See http://openaccess.thecvf.com/content_ECCV_2018/papers/Juncheng_Li_Multi-scale_Residual_Network_ECCV_2018_paper.pdf
 """
 
-import tensorflow as tf
-
+from .. import tf
 from ..Arch.Residual import msrb
 from ..Framework.SuperResolution import SuperResolution
 
diff --git a/VSR/Backend/TF/Models/Nlrn.py b/VSR/Backend/TF/Models/Nlrn.py
index 659d39d..f31205d 100644
--- a/VSR/Backend/TF/Models/Nlrn.py
+++ b/VSR/Backend/TF/Models/Nlrn.py
@@ -10,8 +10,7 @@
 
 import logging
 
-import tensorflow as tf
-
+from .. import tf, tfc
 from ..Arch.Residual import non_local
 from ..Framework.SuperResolution import SuperResolution
 
@@ -75,7 +74,7 @@ def build_loss(self):
       with tf.control_dependencies(update_ops):
         opt = tf.train.AdadeltaOptimizer(self.learning_rate)
         grad = opt.compute_gradients(mse)
-        grad_clip = tf.contrib.training.clip_gradient_norms(
+        grad_clip = tfc.training.clip_gradient_norms(
           grad, self.clip)
         op = opt.apply_gradients(grad_clip, self.global_steps)
         self.loss.append(op)
diff --git a/VSR/Backend/TF/Models/Rcan.py b/VSR/Backend/TF/Models/Rcan.py
index 06b35c1..946ad63 100644
--- a/VSR/Backend/TF/Models/Rcan.py
+++ b/VSR/Backend/TF/Models/Rcan.py
@@ -8,8 +8,7 @@
 See https://arxiv.org/abs/1807.02758
 """
 
-import tensorflow as tf
-
+from .. import tf
 from ..Framework.SuperResolution import SuperResolution
 
 
diff --git a/VSR/Backend/TF/Models/Rdn.py b/VSR/Backend/TF/Models/Rdn.py
index 1cc3f56..9015d26 100644
--- a/VSR/Backend/TF/Models/Rdn.py
+++ b/VSR/Backend/TF/Models/Rdn.py
@@ -9,8 +9,7 @@
 See https://arxiv.org/abs/1802.08797
 """
 
-import tensorflow as tf
-
+from .. import tf
 from ..Framework.SuperResolution import SuperResolution
 
 
diff --git a/VSR/Backend/TF/Models/SRDenseNet.py b/VSR/Backend/TF/Models/SRDenseNet.py
index 0628651..24e96ee 100644
--- a/VSR/Backend/TF/Models/SRDenseNet.py
+++ b/VSR/Backend/TF/Models/SRDenseNet.py
@@ -8,8 +8,7 @@
 See http://openaccess.thecvf.com/content_ICCV_2017/papers/Tong_Image_Super-Resolution_Using_ICCV_2017_paper.pdf
 """
 
-import tensorflow as tf
-
+from .. import tf
 from ..Arch import Dense
 from ..Framework.SuperResolution import SuperResolution
 
diff --git a/VSR/Backend/TF/Models/SRFeat.py b/VSR/Backend/TF/Models/SRFeat.py
index a9ea0a3..4d90d17 100644
--- a/VSR/Backend/TF/Models/SRFeat.py
+++ b/VSR/Backend/TF/Models/SRFeat.py
@@ -7,8 +7,8 @@
 Single Image Super-Resolution with Feature Discrimination (ECCV 2018)
 See http://openaccess.thecvf.com/content_ECCV_2018/papers/Seong-Jin_Park_SRFeat_Single_Image_ECCV_2018_paper.pdf
 """
-import tensorflow as tf
 
+from .. import tf
 from ..Arch import Discriminator
 from ..Framework.GAN import loss_bce_gan
 from ..Framework.SuperResolution import SuperResolution
diff --git a/VSR/Backend/TF/Models/SrGan.py b/VSR/Backend/TF/Models/SrGan.py
index 30fd1a4..9c062e5 100644
--- a/VSR/Backend/TF/Models/SrGan.py
+++ b/VSR/Backend/TF/Models/SrGan.py
@@ -8,8 +8,8 @@
 SRGAN implementation (CVPR 2017)
 See https://arxiv.org/abs/1609.04802
 """
-import tensorflow as tf
 
+from .. import tf
 from ..Arch import Discriminator
 from ..Framework.GAN import loss_bce_gan
 from ..Framework.SuperResolution import SuperResolution
diff --git a/VSR/Backend/TF/Models/Srcnn.py b/VSR/Backend/TF/Models/Srcnn.py
index 5bad475..48088d9 100644
--- a/VSR/Backend/TF/Models/Srcnn.py
+++ b/VSR/Backend/TF/Models/Srcnn.py
@@ -8,9 +8,9 @@
 SRCNN mainly for framework tests (ECCV 2014)
 Ref https://arxiv.org/abs/1501.00092
 """
-import tensorflow as tf
 
 from VSR.Util import to_list
+from .. import tf
 from ..Framework.SuperResolution import SuperResolution
 from ..Util import bicubic_rescale
 
diff --git a/VSR/Backend/TF/Models/Vdsr.py b/VSR/Backend/TF/Models/Vdsr.py
index 4b9c576..fe1b52f 100644
--- a/VSR/Backend/TF/Models/Vdsr.py
+++ b/VSR/Backend/TF/Models/Vdsr.py
@@ -9,8 +9,7 @@
 See https://arxiv.org/abs/1511.04587
 """
 
-import tensorflow as tf
-
+from .. import tf
 from ..Framework.SuperResolution import SuperResolution
 from ..Util import bicubic_rescale
 
diff --git a/VSR/Backend/TF/Models/Vespcn.py b/VSR/Backend/TF/Models/Vespcn.py
index 514a552..3f1e29e 100644
--- a/VSR/Backend/TF/Models/Vespcn.py
+++ b/VSR/Backend/TF/Models/Vespcn.py
@@ -8,11 +8,10 @@
 See https://arxiv.org/abs/1611.05250
 """
 
-import tensorflow as tf
-
+from .. import tf
 from ..Framework.Motion import viz_flow, warp
 from ..Framework.SuperResolution import SuperResolution
-from ..Util import pixel_shift, pad_if_divide
+from ..Util import pad_if_divide, pixel_shift
 
 
 class VESPCN(SuperResolution):
diff --git a/VSR/Backend/TF/Util.py b/VSR/Backend/TF/Util.py
index fa69bf8..dafdea6 100644
--- a/VSR/Backend/TF/Util.py
+++ b/VSR/Backend/TF/Util.py
@@ -1,5 +1,5 @@
 """
-Copyright: Wenyi Tang 2017-2018
+Copyright: Wenyi Tang 2017-2020
 Author: Wenyi Tang
 Email: wenyi.tang@intel.com
 Created Date: May 8th 2018
@@ -10,10 +10,9 @@
 
 import logging
 
-import tensorflow as tf
-
 from VSR.Util import to_list
 from VSR.Util.Math import weights_downsample, weights_upsample
+from . import tf
 
 LOG = logging.getLogger('VSR.TF.Util')
 
@@ -91,8 +90,8 @@ def bicubic_rescale(img, scale):
   with tf.name_scope('Bicubic'):
     shape = tf.shape(img)
     scale = to_list(scale, 2)
-    shape_enlarge = tf.to_float(shape) * [1, *scale, 1]
-    shape_enlarge = tf.to_int32(shape_enlarge)
+    shape_enlarge = tf.cast(shape, dtype=tf.float32) * [1, *scale, 1]
+    shape_enlarge = tf.cast(shape_enlarge, dtype=tf.int32)
     return tf.image.resize_bicubic(img, shape_enlarge[1:3], False)
 
 
diff --git a/VSR/Backend/TF/__init__.py b/VSR/Backend/TF/__init__.py
new file mode 100644
index 0000000..e2be0f3
--- /dev/null
+++ b/VSR/Backend/TF/__init__.py
@@ -0,0 +1,16 @@
+#  Copyright (c) 2017-2020 Wenyi Tang.
+#  Author: Wenyi Tang
+#  Email: wenyitang@outlook.com
+#  Update: 2020 - 5 - 30
+
+import tensorflow as tf
+
+ver_major, ver_minor, _ = [int(s) for s in tf.__version__.split('.')]
+if ver_major >= 2:
+  import tensorflow.compat.v1 as tf
+
+  tf.disable_v2_behavior()
+else:
+  tfc = tf.contrib
+  if ver_minor >= 15:
+    import tensorflow.compat.v1 as tf
diff --git a/VSR/DataLoader/Loader.py b/VSR/DataLoader/Loader.py
index 1efeceb..b496e5d 100644
--- a/VSR/DataLoader/Loader.py
+++ b/VSR/DataLoader/Loader.py
@@ -187,7 +187,7 @@ def __init__(self, hr_data, lr_data=None, scale=None, extra_data: dict = None,
       assert isinstance(lr_data, Container)
     if lr_data is not None and hr_data is not None:
       assert len(hr_data) == len(lr_data), \
-        f"Length of HR and LR data mis-match: {len(lr_data)} != {len(lr_data)}"
+        f"Length of HR and LR data mis-match: {len(hr_data)} != {len(lr_data)}"
     else:
       hr_data = hr_data or lr_data
       lr_data = lr_data or hr_data

From 6d38707fedcf2935053b61c430ee8e86a261e63a Mon Sep 17 00:00:00 2001
From: Wenyi Tang <wenyi.tang@intel.com>
Date: Sun, 31 May 2020 23:51:11 +0800
Subject: [PATCH 02/12] Add support of tensorflow 2.0

---
 CHANGELOG.md                               |   7 +-
 Docs/HowTo/Change backend.md               |   3 +-
 Train/par/keras/srcnn.yaml                 |  18 ++
 VSR/Backend/Keras/Framework/Environment.py | 123 +++++++++++
 VSR/Backend/Keras/Framework/Trainer.py     | 224 +++++++++++++++++++++
 VSR/Backend/Keras/Framework/__init__.py    |   5 +
 VSR/Backend/Keras/Models/Model.py          | 135 +++++++++++++
 VSR/Backend/Keras/Models/Srcnn.py          |  65 ++++++
 VSR/Backend/Keras/Models/__init__.py       |  24 +++
 VSR/Backend/Keras/__init__.py              |  23 +++
 VSR/Backend/TF/Framework/LayersHelper.py   |   5 +-
 VSR/Backend/__init__.py                    |  14 +-
 VSR/Model/__init__.py                      |   8 +-
 prepare_data.py                            |   3 +-
 14 files changed, 644 insertions(+), 13 deletions(-)
 create mode 100644 Train/par/keras/srcnn.yaml
 create mode 100644 VSR/Backend/Keras/Framework/Environment.py
 create mode 100644 VSR/Backend/Keras/Framework/Trainer.py
 create mode 100644 VSR/Backend/Keras/Framework/__init__.py
 create mode 100644 VSR/Backend/Keras/Models/Model.py
 create mode 100644 VSR/Backend/Keras/Models/Srcnn.py
 create mode 100644 VSR/Backend/Keras/Models/__init__.py
 create mode 100644 VSR/Backend/Keras/__init__.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b618887..1ac9d84 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,9 @@
-1.0.5
+1.0.6
+
+## 1.0.6
+## 2020-06
+- Update TF backend
+- Add support to tensorflow 2.0 (both legacy and eager mode)
 
 ## 1.0.5
 ## 2020-05
diff --git a/Docs/HowTo/Change backend.md b/Docs/HowTo/Change backend.md
index b9510d9..15bdaa8 100644
--- a/Docs/HowTo/Change backend.md	
+++ b/Docs/HowTo/Change backend.md	
@@ -5,7 +5,8 @@ for some of models.
 Edit config file `~/.vsr/config.yml`, If you'd like to change to tensorflow:
 (create one if not exist)
 ```yaml
-# the backend could be 'tensorflow', 'tensorflow2', 'pytorch'
+# the backend could be 'tensorflow', 'keras', 'pytorch'
+# the `keras` represents tensorflow v2.0
 backend: tensorflow
 # the verbose could be 'error', 'warning', 'info', 'debug'
 verbose: info
diff --git a/Train/par/keras/srcnn.yaml b/Train/par/keras/srcnn.yaml
new file mode 100644
index 0000000..929d39f
--- /dev/null
+++ b/Train/par/keras/srcnn.yaml
@@ -0,0 +1,18 @@
+# srcnn 9-5-5
+---
+srcnn:
+    layers: 3
+    filters:
+        - 9
+        - 1
+        - 5
+    scale: 4
+    channel: 1
+
+batch: 4
+patch_size: 16
+lr: 1.0e-4
+lr_decay:
+    method: multistep
+    decay_step: [10000, 15000]
+    decay_rate: 0.1
diff --git a/VSR/Backend/Keras/Framework/Environment.py b/VSR/Backend/Keras/Framework/Environment.py
new file mode 100644
index 0000000..5ea5a3b
--- /dev/null
+++ b/VSR/Backend/Keras/Framework/Environment.py
@@ -0,0 +1,123 @@
+#  Copyright (c) 2017-2020 Wenyi Tang.
+#  Author: Wenyi Tang
+#  Email: wenyitang@outlook.com
+#  Update: 2020 - 5 - 30
+
+import logging
+from pathlib import Path
+
+import numpy as np
+import tensorflow as tf
+
+LOG = logging.getLogger('VSR.Framework')
+
+
+def _parse_ckpt_name(name):
+  if not name:
+    return 0
+  model_name, epochs = Path(name).stem.split('-')
+  return int(epochs)
+
+
+class Env:
+  """Pytorch model runtime Env-ironment.
+
+  Args:
+    model: a Model object (note it's NOT nn.Module), representing a container
+      of multiple nn.Module objects. See `VSRTorch.Models.Model` for details.
+    work_dir: a folder path, working directory of this environment.
+
+  Usage:
+    Use `with` syntax to enter the Env:
+
+    >>> with Env(...) as e: ...
+  """
+
+  def __init__(self, model, work_dir=None):
+    self._m = model
+    self._saved = None
+    self._logd = None
+    if work_dir is not None:
+      self._saved = Path(work_dir) / 'save'
+      self._logd = Path(work_dir) / 'log'
+    self._restored = False
+
+  def _startup(self):
+    if isinstance(self._saved, Path):
+      self._saved.mkdir(parents=True, exist_ok=True)
+      self.ckpt = tf.train.Checkpoint(**self.model.modules, **self.model.opts)
+      self.saver = tf.train.CheckpointManager(self.ckpt, self._saved, None,
+                                              checkpoint_name=self.model.name)
+    if isinstance(self._logd, Path):
+      self._logd.mkdir(parents=True, exist_ok=True)
+      if LOG.isEnabledFor(logging.DEBUG):
+        hdl = logging.FileHandler(self._logd / 'training.txt')
+        LOG.addHandler(hdl)
+
+  def _close(self):
+    """TODO anything to close?"""
+    pass
+
+  def __enter__(self):
+    """Create session of tensorflow and build model graph"""
+
+    self._startup()
+    self.model.display()
+    return self
+
+  def __exit__(self, exc_type, exc_val, exc_tb):
+    """Close session"""
+
+    self._close()
+
+  @property
+  def model(self):
+    return self._m
+
+  def _restore_model(self):
+    last_epoch = 0
+    ckpt = self.saver.latest_checkpoint
+    if ckpt:
+      self.ckpt.restore(ckpt)
+    try:
+      last_epoch = max(_parse_ckpt_name(str(ckpt)), last_epoch)
+    except ValueError:
+      last_epoch = 0
+    return last_epoch
+
+  def _save_model(self, step):
+    if not isinstance(self._saved, Path): return
+    self.saver.save(step)
+
+  def _restore(self, epoch=None):
+    # restore graph
+    if self._restored:
+      return self.last_epoch
+    self.last_epoch = self._restore_model()
+    self._restored = True
+    return self.last_epoch
+
+  def set_seed(self, seed):
+    """set a seed for RNG
+
+    Note: RNG in tensorflow and numpy is different.
+    """
+
+    np.random.seed(seed)
+    tf.random.set_seed(seed)
+
+  def export(self, export_dir='.', version=1):
+    """export saved model.
+
+    Args:
+      export_dir: path to saved_model dirs.
+      version: (optional) a child-folder to control output versions.
+    """
+
+    export_path = Path(export_dir) / str(version)
+    while export_path.exists():
+      version += 1  # step ahead 1 version
+      export_path = Path(export_dir) / str(version)
+    export_path.mkdir(exist_ok=False, parents=True)
+    self.model.export(export_path)
+    LOG.info(f"Export saved model to {str(export_path)}")
diff --git a/VSR/Backend/Keras/Framework/Trainer.py b/VSR/Backend/Keras/Framework/Trainer.py
new file mode 100644
index 0000000..ad6542e
--- /dev/null
+++ b/VSR/Backend/Keras/Framework/Trainer.py
@@ -0,0 +1,224 @@
+#  Copyright (c) 2017-2020 Wenyi Tang.
+#  Author: Wenyi Tang
+#  Email: wenyitang@outlook.com
+#  Update: 2020 - 5 - 30
+
+import logging
+import time
+
+import numpy as np
+import tensorflow as tf
+import tqdm
+
+from VSR.Util.Config import Config
+from .Environment import Env
+
+LOG = logging.getLogger('VSR.Framework')
+
+
+def _ensemble_expand(feature):
+  r0 = feature
+  r1 = np.rot90(feature, 1, axes=[-3, -2])
+  r2 = np.rot90(feature, 2, axes=[-3, -2])
+  r3 = np.rot90(feature, 3, axes=[-3, -2])
+  r4 = np.flip(feature, axis=-2)
+  r5 = np.rot90(r4, 1, axes=[-3, -2])
+  r6 = np.rot90(r4, 2, axes=[-3, -2])
+  r7 = np.rot90(r4, 3, axes=[-3, -2])
+  return r0, r1, r2, r3, r4, r5, r6, r7
+
+
+def _ensemble_reduce_mean(outputs):
+  results = []
+  for i in outputs:
+    outputs_ensemble = [
+      i[0],
+      np.rot90(i[1], 3, axes=[-3, -2]),
+      np.rot90(i[2], 2, axes=[-3, -2]),
+      np.rot90(i[3], 1, axes=[-3, -2]),
+      np.flip(i[4], axis=-2),
+      np.flip(np.rot90(i[5], 3, axes=[-3, -2]), axis=-2),
+      np.flip(np.rot90(i[6], 2, axes=[-3, -2]), axis=-2),
+      np.flip(np.rot90(i[7], 1, axes=[-3, -2]), axis=-2),
+    ]
+    results.append(np.concatenate(outputs_ensemble).mean(axis=0, keepdims=True))
+  return results
+
+
+def to_tensor(x):
+  return x / 255.0
+
+
+def from_tensor(x):
+  return x * 255.0
+
+
+class SRTrainer(Env):
+  v = Config()
+
+  def query_config(self, config, **kwargs):
+    config = Config(config or {})
+    config.update(kwargs)
+    self.v.epochs = config.epochs or 1  # total epochs
+    self.v.batch_shape = config.batch_shape or [1, -1, -1, -1]
+    self.v.steps = config.steps or 200
+    self.v.val_steps = config.val_steps or -1
+    self.v.lr = config.lr or 1e-4  # learning rate
+    self.v.lr_schedule = config.lr_schedule
+    self.v.memory_limit = config.memory_limit
+    self.v.inference_results_hooks = config.inference_results_hooks or []
+    self.v.validate_every_n_epoch = config.validate_every_n_epoch or 1
+    self.v.traced_val = config.traced_val
+    self.v.ensemble = config.ensemble
+    self.v.cuda = config.cuda
+    return self.v
+
+  def fit_init(self) -> bool:
+    v = self.v
+    v.epoch = self._restore()
+    if v.epoch >= v.epochs:
+      LOG.info(f'Found pre-trained epoch {v.epoch}>=target {v.epochs},'
+               ' quit fitting.')
+      return False
+    LOG.info('Fitting: {}'.format(self.model.name.upper()))
+    if self._logd:
+      v.writer = tf.summary.create_file_writer(str(self._logd),
+                                               name=self.model.name)
+      v.writer.set_as_default()
+    return True
+
+  def fit_close(self):
+    # flush all pending summaries to disk
+    LOG.info(f'Training {self.model.name.upper()} finished.')
+    if self.v.writer is not None:
+      self.v.writer.close()
+
+  def fit(self, loaders, config, **kwargs):
+    v = self.query_config(config, **kwargs)
+    v.train_loader, v.val_loader = loaders
+    if not self.fit_init():
+      return
+    mem = v.memory_limit
+    for epoch in range(self.last_epoch + 1, v.epochs + 1):
+      v.epoch = epoch
+      train_iter = v.train_loader.make_one_shot_iterator(v.batch_shape,
+                                                         v.steps,
+                                                         shuffle=True,
+                                                         memory_limit=mem)
+      v.train_loader.prefetch(shuffle=True, memory_usage=mem)
+      date = time.strftime('%Y-%m-%d %T', time.localtime())
+      v.avg_meas = {}
+      if v.lr_schedule and callable(v.lr_schedule):
+        v.lr = v.lr_schedule(steps=v.epoch)
+      print('| {} | Epoch: {}/{} | LR: {:.2g} |'.format(
+          date, v.epoch, v.epochs, v.lr))
+      with tqdm.tqdm(train_iter, unit='batch', ascii=True) as r:
+        self.model.to_train()
+        for items in r:
+          self.fn_train_each_step(items)
+          r.set_postfix(v.loss)
+      for _k, _v in v.avg_meas.items():
+        _v = np.mean(_v)
+        tf.summary.scalar(_k, _v, step=v.epoch, description='train')
+        print('| Epoch average {} = {:.6f} |'.format(_k, _v))
+      if v.epoch % v.validate_every_n_epoch == 0 and v.val_loader:
+        # Hard-coded memory limitation for validating
+        self.benchmark(v.val_loader, v, memory_limit='1GB')
+      self._save_model(v.epoch)
+    self.fit_close()
+
+  def fn_train_each_step(self, pack):
+    v = self.v
+    feature = to_tensor(pack['lr'])
+    label = to_tensor(pack['hr'])
+    loss = self.model.train([feature], [label], v.lr)
+    for _k, _v in loss.items():
+      v.avg_meas[_k] = \
+        v.avg_meas[_k] + [_v] if v.avg_meas.get(_k) else [_v]
+      loss[_k] = '{:08.5f}'.format(_v)
+    v.loss = loss
+
+  def benchmark(self, loader, config, **kwargs):
+    """Benchmark/validate the model.
+
+    Args:
+        loader: a loader for enumerating LR images
+        config: benchmark configuration, an instance of `Util.Config.Config`
+        kwargs: additional arguments to override the same ones in config.
+    """
+    v = self.query_config(config, **kwargs)
+    self._restore(config.epoch)
+    v.mean_metrics = {}
+    v.loader = loader
+    it = v.loader.make_one_shot_iterator(v.batch_shape, v.val_steps,
+                                         shuffle=not v.traced_val,
+                                         memory_limit=v.memory_limit)
+    self.model.to_eval()
+    for items in tqdm.tqdm(it, 'Test', ascii=True):
+      self.fn_benchmark_each_step(items)
+    for _k, _v in v.mean_metrics.items():
+      _v = np.mean(_v)
+      tf.summary.scalar(_k, _v, step=v.epoch, description='eval')
+      print('{}: {:.6f}'.format(_k, _v), end=', ')
+    print('')
+
+  def fn_benchmark_each_step(self, pack):
+    v = self.v
+    feature = to_tensor(pack['lr'])
+    label = to_tensor(pack['hr'])
+    outputs, metrics = self.model.eval([feature], [label], epoch=v.epoch)
+    for _k, _v in metrics.items():
+      if _k not in v.mean_metrics:
+        v.mean_metrics[_k] = []
+      v.mean_metrics[_k] += [_v]
+    outputs = [from_tensor(x) for x in outputs]
+    for fn in v.inference_results_hooks:
+      outputs = fn(outputs, names=pack['name'])
+      if outputs is None:
+        break
+
+  def infer(self, loader, config, **kwargs):
+    """Infer SR images.
+
+    Args:
+        loader: a loader for enumerating LR images
+        config: inferring configuration, an instance of `Util.Config.Config`
+        kwargs: additional arguments to override the same ones in config.
+    """
+    v = self.query_config(config, **kwargs)
+    self._restore(config.epoch)
+    it = loader.make_one_shot_iterator([1, -1, -1, -1], -1)
+    if hasattr(it, '__len__'):
+      if len(it):
+        LOG.info('Inferring {} at epoch {}'.format(
+            self.model.name, self.last_epoch))
+      else:
+        return
+    # use original images in inferring
+    self.model.to_eval()
+    for items in tqdm.tqdm(it, 'Infer', ascii=True):
+      self.fn_infer_each_step(items)
+
+  def fn_infer_each_step(self, pack):
+    v = self.v
+    if v.ensemble:
+      # add self-ensemble boosting metric score
+      feature_ensemble = _ensemble_expand(pack['lr'])
+      outputs_ensemble = []
+      for f in feature_ensemble:
+        f = to_tensor(f)
+        y, _ = self.model.eval([f])
+        y = [from_tensor(x) for x in y]
+        outputs_ensemble.append(y)
+      outputs = []
+      for i in range(len(outputs_ensemble[0])):
+        outputs.append([j[i] for j in outputs_ensemble])
+      outputs = _ensemble_reduce_mean(outputs)
+    else:
+      feature = to_tensor(pack['lr'])
+      outputs, _ = self.model.eval([feature])
+      outputs = [from_tensor(x) for x in outputs]
+    for fn in v.inference_results_hooks:
+      outputs = fn(outputs, names=pack['name'])
+      if outputs is None:
+        break
diff --git a/VSR/Backend/Keras/Framework/__init__.py b/VSR/Backend/Keras/Framework/__init__.py
new file mode 100644
index 0000000..69a01b3
--- /dev/null
+++ b/VSR/Backend/Keras/Framework/__init__.py
@@ -0,0 +1,5 @@
+#  Copyright (c) 2017-2020 Wenyi Tang.
+#  Author: Wenyi Tang
+#  Email: wenyitang@outlook.com
+#  Update: 2020 - 5 - 30
+
diff --git a/VSR/Backend/Keras/Models/Model.py b/VSR/Backend/Keras/Models/Model.py
new file mode 100644
index 0000000..f698113
--- /dev/null
+++ b/VSR/Backend/Keras/Models/Model.py
@@ -0,0 +1,135 @@
+#  Copyright (c) 2017-2020 Wenyi Tang.
+#  Author: Wenyi Tang
+#  Email: wenyitang@outlook.com
+#  Update: 2020 - 5 - 30
+
+import logging
+
+import tensorflow as tf
+
+from .. import LOG
+from ..Framework.Trainer import SRTrainer
+
+
+class BasicModel:
+  """Trainable model wrapper for keras.Model objects
+
+  There are 2 built-in attributes:
+    - modules: contains a K-V pair of `str: Model`. It will be automatically
+      appended if a derived object assign any attribute with `Model` object.
+    - opts: contains a K-V pair of `str: Optimizer`. Will be automatically
+      appended if a derived object assign any attribute with `Optimizer`.
+  """
+
+  def __init__(self, **kwargs):
+    self.modules = {}
+    self.opts = {}
+    self.name = kwargs.get('name', 'model')
+    self._trainer = None
+
+  def __setattr__(self, key, value):
+    if key in ('modules', 'opts',):
+      if hasattr(self, key):
+        raise ValueError(f"Can't overwrite built-in '{key}' of BasicModel")
+    if isinstance(value, tf.keras.Model):
+      if key in self.modules:
+        if self.modules[key] is value:
+          return
+        else:
+          # TODO: why assign twice??
+          raise NotImplementedError
+      else:
+        self.modules[key] = value
+    if isinstance(value, tf.keras.optimizers.Optimizer):
+      if key in self.opts:
+        if self.opts[key] is value:
+          return
+        else:
+          raise NotImplementedError
+      else:
+        self.opts[key] = value
+
+    return super(BasicModel, self).__setattr__(key, value)
+
+  def trainable_variables(self, name=None):
+    """Return variables who require gradients.
+
+    Args:
+      name: module name. Will return all trainable variables if no name given.
+    """
+
+    _m = [self.modules.get(name)] if name else self.modules.values()
+    _var = []
+    for i in _m:
+      _var += i.trainable_variables
+    return _var
+
+  def to_train(self):
+    """Change modules to train mode."""
+    pass
+
+  def train(self, *args, **kwargs):
+    """Forward and backward data path.
+      The trainer knows data pipeline through this callback."""
+    raise NotImplementedError
+
+  def to_eval(self):
+    """Change modules to evaluate mode."""
+    pass
+
+  def eval(self, *args, **kwargs):
+    """Forward data path. No backward needed for this is only for testing."""
+    raise NotImplementedError
+
+  def display(self):
+    """Show model info"""
+    num_params = 0
+    for m in self.modules.values():
+      for p in m.variables:
+        num_params += p.get_shape().num_elements()
+    LOG.info(f"Total params: {num_params}")
+    if LOG.isEnabledFor(logging.DEBUG):
+      [v.summary() for v in self.modules.values()]
+
+  def cuda(self):
+    """Move model to cuda device."""
+    pass
+
+  def export(self, export_dir):
+    """export keras model.
+
+    Args:
+      export_dir: path to save pb files.
+    """
+
+    raise NotImplementedError("Should implement in specific model!")
+
+  @property
+  def executor(self):
+    """Return the trainer class type for this model."""
+    return self.get_executor(None)
+
+  def get_executor(self, root):
+    if issubclass(self._trainer.__class__, type):
+      self._trainer = self._trainer(self, root)
+      return self._trainer
+    else:
+      return self._trainer
+
+  def load(self, ckpt):
+    for key, model in self.modules.items():
+      if not isinstance(ckpt, dict):
+        model.load_weights(str(ckpt))
+        break
+      model.load_weights(str(ckpt[key]))
+
+
+class SuperResolution(BasicModel):
+  """A default model for (video) super-resolution"""
+
+  def __init__(self, scale, channel, **kwargs):
+    super(SuperResolution, self).__init__(**kwargs)
+    self.scale = scale
+    self.channel = channel
+    # Default SR trainer
+    self._trainer = SRTrainer
diff --git a/VSR/Backend/Keras/Models/Srcnn.py b/VSR/Backend/Keras/Models/Srcnn.py
new file mode 100644
index 0000000..b7a249a
--- /dev/null
+++ b/VSR/Backend/Keras/Models/Srcnn.py
@@ -0,0 +1,65 @@
+#  Copyright (c) 2017-2020 Wenyi Tang.
+#  Author: Wenyi Tang
+#  Email: wenyitang@outlook.com
+#  Update: 2020 - 5 - 30
+
+import tensorflow as tf
+
+from .Model import SuperResolution
+
+
+class Srcnn(tf.keras.Model):
+  def __init__(self, channel, filters):
+    super(Srcnn, self).__init__()
+    self.net = [
+      tf.keras.layers.Conv2D(64, filters[0], padding='same',
+                             activation=tf.nn.relu),
+      tf.keras.layers.Conv2D(32, filters[1], padding='same',
+                             activation=tf.nn.relu),
+      tf.keras.layers.Conv2D(channel, filters[2], padding='same')]
+
+  def call(self, inputs):
+    x = inputs
+    for layer in self.net:
+      x = layer(x)
+    return x
+
+
+class SRCNN(SuperResolution):
+  def __init__(self, channel, scale, **kwargs):
+    super(SRCNN, self).__init__(scale=scale, channel=channel, name='srcnn')
+    self.net = Srcnn(channel, kwargs.get('filters', (9, 1, 5)))
+    self.net(tf.keras.Input([None, None, channel]))
+    self.opt = tf.keras.optimizers.Adam(learning_rate=1e-4)
+
+  def train(self, inputs, labels, learning_rate=None):
+    lr_image = inputs[0]
+    _, H, W, _ = lr_image.shape
+    bi_image = tf.image.resize(lr_image, [H * self.scale, W * self.scale],
+                               tf.image.ResizeMethod.BICUBIC)
+    with tf.GradientTape() as tape:
+      sr = self.net(bi_image)
+      pixel_loss = tf.reduce_mean(tf.losses.mean_squared_error(labels[0], sr))
+    variables = self.trainable_variables()
+    grads = tape.gradient(pixel_loss, variables)
+    if learning_rate:
+      self.opt.learning_rate = learning_rate
+    self.opt.apply_gradients(zip(grads, variables))
+    return {
+      'loss': pixel_loss.numpy()
+    }
+
+  def eval(self, inputs, labels=None, **kwargs):
+    metrics = {}
+    lr_image = inputs[0]
+    _, H, W, _ = lr_image.shape
+    bi_image = tf.image.resize(lr_image, [H * self.scale, W * self.scale],
+                               tf.image.ResizeMethod.BICUBIC)
+    sr = self.net(bi_image)
+    if labels is not None:
+      metrics['psnr'] = tf.image.psnr(sr, labels[0], 1.0)
+      step = kwargs.get('epoch')
+      tf.summary.image('sr', sr, step=step, max_outputs=1)
+      tf.summary.image('bicubic', bi_image, step=step, max_outputs=1)
+      tf.summary.image('gt', labels[0], step=step, max_outputs=1)
+    return [sr.numpy()], metrics
diff --git a/VSR/Backend/Keras/Models/__init__.py b/VSR/Backend/Keras/Models/__init__.py
new file mode 100644
index 0000000..f038db1
--- /dev/null
+++ b/VSR/Backend/Keras/Models/__init__.py
@@ -0,0 +1,24 @@
+#  Copyright (c) 2017-2020 Wenyi Tang.
+#  Author: Wenyi Tang
+#  Email: wenyitang@outlook.com
+#  Update: 2020 - 5 - 30
+
+import importlib
+
+__all__ = ['get_model', 'list_supported_models']
+
+models = {
+  # alias: (file, class)
+  'srcnn': ('Srcnn', 'SRCNN'),
+}
+
+
+def get_model(name):
+  module = f'.Backend.Keras.Models.{models[name][0]}'
+  package = 'VSR'
+  m = importlib.import_module(module, package)
+  return m.__dict__[models[name][1]]
+
+
+def list_supported_models():
+  return models.keys()
diff --git a/VSR/Backend/Keras/__init__.py b/VSR/Backend/Keras/__init__.py
new file mode 100644
index 0000000..f09827e
--- /dev/null
+++ b/VSR/Backend/Keras/__init__.py
@@ -0,0 +1,23 @@
+#  Copyright (c) 2017-2020 Wenyi Tang.
+#  Author: Wenyi Tang
+#  Email: wenyitang@outlook.com
+#  Update: 2020 - 5 - 30
+
+from .. import LOG
+
+import tensorflow as tf
+
+ver_major, ver_minor, _ = [int(s) for s in tf.__version__.split('.')]
+if ver_major < 2:
+  LOG.warning("legacy tensorflow 1.x is not verified in keras backend")
+
+gpus = tf.config.experimental.list_physical_devices('GPU')
+if gpus:
+  # Restrict TensorFlow to only use the first GPU
+  try:
+    # Currently, memory growth needs to be the same across GPUs
+    for gpu in gpus:
+      tf.config.experimental.set_memory_growth(gpu, True)
+  except RuntimeError as e:
+    # Visible devices must be set before GPUs have been initialized
+    print(e)
diff --git a/VSR/Backend/TF/Framework/LayersHelper.py b/VSR/Backend/TF/Framework/LayersHelper.py
index 26a5f85..9c1e7fe 100644
--- a/VSR/Backend/TF/Framework/LayersHelper.py
+++ b/VSR/Backend/TF/Framework/LayersHelper.py
@@ -8,7 +8,7 @@
 """
 
 from VSR.Util import to_list
-from .. import tf, tfc
+from .. import tf
 from ..Util import (
   SpectralNorm, TorchInitializer, pixel_shift, pop_dict_wo_keyerror, prelu
 )
@@ -25,6 +25,7 @@ def batch_norm(self, x, training, decay=0.9, epsilon=1e-5, name=None):
                                          name=name)
 
   def instance_norm(self, x, trainable=True, name=None, reuse=None):
+    from .. import tfc
     with tf.variable_scope(name, 'InstanceNorm', reuse=reuse):
       return tfc.layers.instance_norm(
           x,
@@ -32,6 +33,7 @@ def instance_norm(self, x, trainable=True, name=None, reuse=None):
           variables_collections=[tf.GraphKeys.GLOBAL_VARIABLES])
 
   def layer_norm(self, x, trainable=True, name=None, reuse=None):
+    from .. import tfc
     with tf.variable_scope(name, 'LayerNorm', reuse=reuse):
       return tfc.layers.layer_norm(
           x,
@@ -39,6 +41,7 @@ def layer_norm(self, x, trainable=True, name=None, reuse=None):
           variables_collections=[tf.GraphKeys.GLOBAL_VARIABLES])
 
   def group_norm(self, x, group, axis, trainable=True, name=None, reuse=None):
+    from .. import tfc
     with tf.variable_scope(name, 'GroupNorm', reuse=reuse):
       return tfc.layers.group_norm(
           x, group, axis,
diff --git a/VSR/Backend/__init__.py b/VSR/Backend/__init__.py
index efbfc46..1cfab7a 100644
--- a/VSR/Backend/__init__.py
+++ b/VSR/Backend/__init__.py
@@ -36,17 +36,23 @@
 BACKEND = CONFIG['backend'].lower()
 if BACKEND == 'auto':
   BACKEND = 'tensorflow'
-if BACKEND not in ('tensorflow', 'tensorflow2', 'pytorch'):
+if BACKEND not in ('tensorflow', 'keras', 'pytorch'):
   BACKEND = 'pytorch'
 
-if BACKEND in ('tensorflow', 'tensorflow2'):
+if BACKEND in ('tensorflow', 'keras'):
   try:
     tf = import_module('tensorflow')
     CONFIG['data_format'] = 'channels_last'
-    if BACKEND == 'tensorflow2' and tf.__version__.split('.')[0] != '2':
+    tf_ver_major, tf_ver_minor, _ = [int(s) for s in tf.__version__.split('.')]
+    if BACKEND == 'keras' and tf_ver_major < 2:
       LOG.warning(f"[!] Current tensorflow version is {tf.__version__}")
-      LOG.info("[*] Fallback to use tensorflow")
+      LOG.info("[*] Fallback to use legacy tensorflow v1.x")
       BACKEND = 'tensorflow'
+    if tf_ver_major == 1 and tf_ver_minor < 15:
+      LOG.warning("[!!] VSR does not support TF < 1.15.0 any longer.")
+      LOG.warning("[!] Considering use an old version of VSR, "
+                  "or update your tensorflow version.")
+      raise ImportError
   except ImportError:
     LOG.warning("[!] Tensorflow package not found in your system.")
     LOG.info("[*] Fallback to use PyTorch...")
diff --git a/VSR/Model/__init__.py b/VSR/Model/__init__.py
index f713cd4..610b3b7 100644
--- a/VSR/Model/__init__.py
+++ b/VSR/Model/__init__.py
@@ -20,8 +20,8 @@ def get_model(name: str):
       return import_module('.Models', 'VSR.Backend.Torch').get_model(name)
     elif BACKEND == 'tensorflow':
       return import_module('.Models', 'VSR.Backend.TF').get_model(name)
-    elif BACKEND == 'tensorflow2':
-      pass
+    elif BACKEND == 'keras':
+      return import_module('.Models', 'VSR.Backend.Keras').get_model(name)
   except (KeyError, ImportError):
     raise ImportError(f"Using {BACKEND}, can't find model {name}.")
 
@@ -31,5 +31,5 @@ def list_supported_models():
     return import_module('.Models', 'VSR.Backend.Torch').list_supported_models()
   elif BACKEND == 'tensorflow':
     return import_module('.Models', 'VSR.Backend.TF').list_supported_models()
-  elif BACKEND == 'tensorflow2':
-    pass
+  elif BACKEND == 'keras':
+    return import_module('.Models', 'VSR.Backend.Keras').list_supported_models()
diff --git a/prepare_data.py b/prepare_data.py
index f5d7b7c..a8eeab9 100644
--- a/prepare_data.py
+++ b/prepare_data.py
@@ -28,7 +28,7 @@
   exit(-1)
 
 _DEFAULT_DATASET_PATH = '/mnt/data/datasets'
-_DEFAULT_DOWNLOAD_DIR = '/tmp/downloads'
+_DEFAULT_DOWNLOAD_DIR = '.vsr/downloads'
 _DEFAULT_WEIGHTS_DIR = './Results'
 # Contact me if any of these links un-accessed
 DATASETS = {
@@ -53,7 +53,6 @@
 }
 WEIGHTS = {
   'srcnn.tar.gz': 'https://github.com/LoSealL/Model/releases/download/srcnn/srcnn.tar.gz',
-  'espcn.tar.gz': 'https://github.com/LoSealL/Model/releases/download/espcn/espcn.tar.gz',
   'edsr.zip': 'https://github.com/LoSealL/Model/releases/download/edsr/edsr.zip',
   'dncnn.zip': 'https://github.com/LoSealL/Model/releases/download/DnCNN/dncnn.zip',
   'carn.zip': 'https://github.com/LoSealL/Model/releases/download/carn/carn.zip',

From 931d5a41dcbaf2a5dd75fbab9ca51d70ef639d0c Mon Sep 17 00:00:00 2001
From: Wenyi Tang <wenyi.tang@intel.com>
Date: Tue, 2 Jun 2020 09:53:55 +0800
Subject: [PATCH 03/12] Call train tools from an arbitrary workspace

---
 Train/check_dataset.py           | 4 +++-
 Train/eval.py                    | 7 ++++---
 Train/train.py                   | 7 ++++---
 VSR/Backend/Torch/Models/Srmd.py | 4 ++--
 4 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/Train/check_dataset.py b/Train/check_dataset.py
index e27714d..db67700 100644
--- a/Train/check_dataset.py
+++ b/Train/check_dataset.py
@@ -4,6 +4,7 @@
 #  Update: 2020 - 4 - 17
 
 import argparse
+from pathlib import Path
 
 from VSR.DataLoader import load_datasets
 
@@ -48,11 +49,12 @@ def _check(name: str):
 
 
 if __name__ == '__main__':
+  CWD = Path(__file__).resolve().parent.parent
   parser = argparse.ArgumentParser(
       description="Check the dataset and print out its content")
   parser.add_argument("dataset", type=str,
                       help="The name of the dataset, case insensitive.")
-  parser.add_argument("--description-file", default="../Data/datasets.yaml",
+  parser.add_argument("--description-file", default=f"{CWD}/Data/datasets.yaml",
                       help="DDF file")
   flags = parser.parse_args()
   main(flags.dataset, flags.description_file)
diff --git a/Train/eval.py b/Train/eval.py
index df65aab..8904d6b 100644
--- a/Train/eval.py
+++ b/Train/eval.py
@@ -13,13 +13,14 @@
   Config, compat_param, save_inference_images, suppress_opt_by_args
 )
 
+CWD = Path(__file__).resolve().parent.parent
 parser = argparse.ArgumentParser(description=f'VSR ({BACKEND}) Testing Tool v1.0')
 g0 = parser.add_argument_group("basic options")
 g0.add_argument("model", choices=list_supported_models(), help="specify the model name")
 g0.add_argument("-p", "--parameter", help="specify the model parameter file (*.yaml)")
 g0.add_argument("-t", "--test", nargs='*', help="specify test dataset name or data path")
-g0.add_argument("--save_dir", default='../Results', help="working directory")
-g0.add_argument("--data_config", default="../Data/datasets.yaml", help="specify dataset config file")
+g0.add_argument("--save_dir", default=f'{CWD}/Results', help="working directory")
+g0.add_argument("--data_config", default=f"{CWD}/Data/datasets.yaml", help="specify dataset config file")
 g1 = parser.add_argument_group("evaluating options")
 g1.add_argument("--pretrain", help="specify the pre-trained model checkpoint or will search into `save_dir` if not specified")
 g1.add_argument("--ensemble", action="store_true")
@@ -66,7 +67,7 @@ def main():
     if opt.parameter:
       model_config_file = Path(opt.parameter)
     else:
-      model_config_file = Path(f'par/{BACKEND}/{opt.model}.{_ext}')
+      model_config_file = Path(f'{CWD}/Train/par/{BACKEND}/{opt.model}.{_ext}')
     if model_config_file.exists():
       opt.update(compat_param(Config(str(model_config_file))))
   # get model parameters from pre-defined YAML file
diff --git a/Train/train.py b/Train/train.py
index 6689d83..8cb9957 100644
--- a/Train/train.py
+++ b/Train/train.py
@@ -12,12 +12,13 @@
 from VSR.Model import get_model, list_supported_models
 from VSR.Util import Config, lr_decay, suppress_opt_by_args, compat_param
 
+CWD = Path(__file__).resolve().parent.parent
 parser = argparse.ArgumentParser(description=f'VSR ({BACKEND}) Training Tool v1.0')
 g0 = parser.add_argument_group("basic options")
 g0.add_argument("model", choices=list_supported_models(), help="specify the model name")
 g0.add_argument("-p", "--parameter", help="specify the model parameter file (*.yaml)")
-g0.add_argument("--save_dir", default='../Results', help="working directory")
-g0.add_argument("--data_config", default="../Data/datasets.yaml", help="specify dataset config file")
+g0.add_argument("--save_dir", default=f'{CWD}/Results', help="working directory")
+g0.add_argument("--data_config", default=f"{CWD}/Data/datasets.yaml", help="specify dataset config file")
 g1 = parser.add_argument_group("training options")
 g1.add_argument("--dataset", default='none', help="specify a dataset alias for training")
 g1.add_argument("--epochs", type=int, default=1, help="specify total epochs to train")
@@ -48,7 +49,7 @@ def main():
     if opt.parameter:
       model_config_file = Path(opt.parameter)
     else:
-      model_config_file = Path(f'par/{BACKEND}/{opt.model}.{_ext}')
+      model_config_file = Path(f'{CWD}/Train/par/{BACKEND}/{opt.model}.{_ext}')
     if model_config_file.exists():
       opt.update(compat_param(Config(str(model_config_file))))
   # get model parameters from pre-defined YAML file
diff --git a/VSR/Backend/Torch/Models/Srmd.py b/VSR/Backend/Torch/Models/Srmd.py
index 011d29c..9deb0b3 100644
--- a/VSR/Backend/Torch/Models/Srmd.py
+++ b/VSR/Backend/Torch/Models/Srmd.py
@@ -45,12 +45,12 @@ def gen_kernel(self, ktype, ksize, l1, l2=None, theta=0):
 
   def gen_random_kernel(self):
     theta = np.random.uniform(0, np.pi)
-    l1 = np.random.uniform(0.1, 10)
+    l1 = np.random.uniform(0.1, self.l1)
     l2 = np.random.uniform(0.1, l1)
     return self.gen_kernel('anisotropic', self.kernel_size, l1, l2, theta)
 
   def gen_random_noise(self, shape):
-    stddev = np.random.uniform(0, 75 / 255, size=[shape[0]])
+    stddev = np.random.uniform(0, self.noise, size=[shape[0]])
     noise = np.random.normal(size=shape) * stddev
     return noise, stddev
 

From 7bec6044a406a19d7eda48180fd150ce1c7730ee Mon Sep 17 00:00:00 2001
From: Wenyi Tang <wenyi.tang@intel.com>
Date: Tue, 16 Jun 2020 18:54:54 +0800
Subject: [PATCH 04/12] Refactor pytorch model designs

- Remove subfolder of each individual networks, for a better
  code structure and reuse;
- Reuse existing architectures and blocks as many as possible,
  which relies on a `sequential_load` api
- Add a new api in `Model` object, "sequential_load", which will
  load and modify variables in defined order and ignore the key
  mis-matching.
---
 Tests/space_to_depth_test.py                  |   2 +-
 Train/par/pytorch/dbpn.yml                    |   3 +-
 Train/par/pytorch/edsr.yml                    |   4 +-
 Train/par/pytorch/esrgan.yml                  |   7 +-
 Train/par/pytorch/msrn.yml                    |   2 +-
 Train/par/pytorch/rcan.yml                    |   3 +-
 Train/par/pytorch/realsr.yml                  |  10 +
 Train/par/pytorch/srmd.yml                    |  10 +-
 VSR/Backend/Torch/Framework/Environment.py    |   3 +-
 VSR/Backend/Torch/Models/Arch.py              | 393 ----------------
 VSR/Backend/Torch/Models/Carn.py              | 134 +++++-
 VSR/Backend/Torch/Models/Classic.py           | 148 +-----
 VSR/Backend/Torch/Models/Contrib/__init__.py  |   5 +
 .../Models/{ => Contrib}/ntire19/__init__.py  |   0
 .../Models/{ => Contrib}/ntire19/denoise.py   |   3 +-
 .../Models/{ => Contrib}/ntire19/edrn.py      |   0
 .../Torch/Models/{ => Contrib}/ntire19/frn.py |   2 +-
 .../Models/{ => Contrib}/ntire19/ran2.py      |   0
 .../Torch/Models/Contrib/ntire20/__init__.py  |   9 +
 .../Contrib/ntire20/xiaozhong/__init__.py     |   9 +
 .../Contrib/ntire20/xiaozhong/ops/__init__.py |  66 +++
 .../ntire20/xiaozhong/ops/discriminator.py    | 279 +++++++++++
 .../Contrib/ntire20/xiaozhong/ops/loss.py     |  79 ++++
 .../Contrib/ntire20/xiaozhong/ops/network.py  |  92 ++++
 VSR/Backend/Torch/Models/Crdn.py              |  22 +-
 VSR/Backend/Torch/Models/Dbpn.py              | 152 +++++-
 VSR/Backend/Torch/Models/Drn.py               |   7 +-
 VSR/Backend/Torch/Models/Edsr.py              | 115 ++++-
 VSR/Backend/Torch/Models/Esrgan.py            |  47 +-
 VSR/Backend/Torch/Models/Ffdnet.py            |   3 +-
 VSR/Backend/Torch/Models/Frvsr.py             |  33 +-
 VSR/Backend/Torch/Models/Mldn.py              |   3 +-
 VSR/Backend/Torch/Models/Model.py             |  49 +-
 VSR/Backend/Torch/Models/Msrn.py              |  85 +++-
 VSR/Backend/Torch/Models/NTIRE19.py           |  62 +--
 VSR/Backend/Torch/Models/NTIRE20.py           |  54 +++
 VSR/Backend/Torch/Models/Ops/Blocks.py        | 388 ++++++++++++++++
 .../Torch/Models/{ => Ops}/Discriminator.py   |   9 +-
 VSR/Backend/Torch/Models/Ops/Distortion.py    | 140 ++++++
 VSR/Backend/Torch/Models/Ops/Initializer.py   |   5 +
 VSR/Backend/Torch/Models/{ => Ops}/Loss.py    |  42 +-
 VSR/Backend/Torch/Models/Ops/Motion.py        | 185 ++++++++
 VSR/Backend/Torch/Models/Ops/Scale.py         | 160 +++++++
 VSR/Backend/Torch/Models/Ops/__init__.py      |   5 +
 VSR/Backend/Torch/Models/Optim/SISR.py        | 210 +++++++++
 VSR/Backend/Torch/Models/Qprn.py              |  47 +-
 VSR/Backend/Torch/Models/Rbpn.py              | 104 ++++-
 VSR/Backend/Torch/Models/Rcan.py              |  69 ++-
 VSR/Backend/Torch/Models/SRFeat.py            |  44 +-
 VSR/Backend/Torch/Models/Sofvsr.py            | 198 +++++++-
 VSR/Backend/Torch/Models/Spmc.py              | 120 ++++-
 VSR/Backend/Torch/Models/Srmd.py              | 135 +++---
 VSR/Backend/Torch/Models/TecoGAN.py           |  76 ++-
 VSR/Backend/Torch/Models/Vespcn.py            |  84 +++-
 VSR/Backend/Torch/Models/carn/__init__.py     |   9 -
 VSR/Backend/Torch/Models/carn/carn.py         |  85 ----
 VSR/Backend/Torch/Models/carn/carn_m.py       |  83 ----
 VSR/Backend/Torch/Models/carn/ops.py          | 143 ------
 VSR/Backend/Torch/Models/dbpn/__init__.py     |   9 -
 .../Torch/Models/dbpn/base_networks.py        | 438 ------------------
 VSR/Backend/Torch/Models/dbpn/dbpn.py         |  98 ----
 VSR/Backend/Torch/Models/dbpn/dbpn_v1.py      | 122 -----
 VSR/Backend/Torch/Models/dbpn/dbpns.py        |  56 ---
 VSR/Backend/Torch/Models/edsr/__init__.py     |   9 -
 VSR/Backend/Torch/Models/edsr/common.py       |  90 ----
 VSR/Backend/Torch/Models/edsr/edsr.py         |  83 ----
 VSR/Backend/Torch/Models/edsr/mdsr.py         |  67 ---
 VSR/Backend/Torch/Models/esrgan/__init__.py   |   9 -
 .../Torch/Models/esrgan/architecture.py       |  48 --
 VSR/Backend/Torch/Models/esrgan/block.py      | 286 ------------
 VSR/Backend/Torch/Models/frvsr/__init__.py    |   4 -
 VSR/Backend/Torch/Models/frvsr/ops.py         |  72 ---
 VSR/Backend/Torch/Models/msrn/__init__.py     |   9 -
 VSR/Backend/Torch/Models/msrn/msrn.py         | 118 -----
 VSR/Backend/Torch/Models/rbpn/__init__.py     |  11 -
 VSR/Backend/Torch/Models/rbpn/base_network.py | 413 -----------------
 VSR/Backend/Torch/Models/rbpn/ops.py          | 157 -------
 VSR/Backend/Torch/Models/rcan/__init__.py     |   9 -
 VSR/Backend/Torch/Models/rcan/common.py       |  78 ----
 VSR/Backend/Torch/Models/rcan/rcan.py         | 151 ------
 VSR/Backend/Torch/Models/sof/__init__.py      |   9 -
 VSR/Backend/Torch/Models/sof/modules.py       | 201 --------
 VSR/Backend/Torch/Models/spmc/__init__.py     |  11 -
 VSR/Backend/Torch/Models/spmc/ops.py          | 225 ---------
 VSR/Backend/Torch/Models/srfeat/__init__.py   |  10 -
 VSR/Backend/Torch/Models/srfeat/ops.py        |  35 --
 VSR/Backend/Torch/Models/srmd/__init__.py     |  10 -
 VSR/Backend/Torch/Models/srmd/ops.py          |  37 --
 VSR/Backend/Torch/Models/teco/__init__.py     |  11 -
 VSR/Backend/Torch/Models/teco/ops.py          |  64 ---
 VSR/Backend/Torch/Models/vespcn/__init__.py   |  10 -
 VSR/Backend/Torch/Models/vespcn/ops.py        | 106 -----
 VSR/Backend/Torch/Models/video/__init__.py    |   8 -
 VSR/Backend/Torch/Models/video/motion.py      |  93 ----
 VSR/Backend/Torch/Util/Distortion.py          | 140 ++++++
 VSR/Backend/Torch/Util/Utility.py             |  30 +-
 VSR/Backend/Torch/__init__.py                 |   0
 VSR/Util/Math.py                              |  13 +
 .../srmd/pca.py => Util/PcaPrecompute.py}     |  17 +-
 prepare_data.py                               |  13 -
 100 files changed, 3258 insertions(+), 4358 deletions(-)
 create mode 100644 Train/par/pytorch/realsr.yml
 delete mode 100644 VSR/Backend/Torch/Models/Arch.py
 create mode 100644 VSR/Backend/Torch/Models/Contrib/__init__.py
 rename VSR/Backend/Torch/Models/{ => Contrib}/ntire19/__init__.py (100%)
 rename VSR/Backend/Torch/Models/{ => Contrib}/ntire19/denoise.py (98%)
 rename VSR/Backend/Torch/Models/{ => Contrib}/ntire19/edrn.py (100%)
 rename VSR/Backend/Torch/Models/{ => Contrib}/ntire19/frn.py (99%)
 rename VSR/Backend/Torch/Models/{ => Contrib}/ntire19/ran2.py (100%)
 create mode 100644 VSR/Backend/Torch/Models/Contrib/ntire20/__init__.py
 create mode 100644 VSR/Backend/Torch/Models/Contrib/ntire20/xiaozhong/__init__.py
 create mode 100644 VSR/Backend/Torch/Models/Contrib/ntire20/xiaozhong/ops/__init__.py
 create mode 100644 VSR/Backend/Torch/Models/Contrib/ntire20/xiaozhong/ops/discriminator.py
 create mode 100644 VSR/Backend/Torch/Models/Contrib/ntire20/xiaozhong/ops/loss.py
 create mode 100644 VSR/Backend/Torch/Models/Contrib/ntire20/xiaozhong/ops/network.py
 create mode 100644 VSR/Backend/Torch/Models/Ops/Blocks.py
 rename VSR/Backend/Torch/Models/{ => Ops}/Discriminator.py (96%)
 create mode 100644 VSR/Backend/Torch/Models/Ops/Distortion.py
 create mode 100644 VSR/Backend/Torch/Models/Ops/Initializer.py
 rename VSR/Backend/Torch/Models/{ => Ops}/Loss.py (78%)
 create mode 100644 VSR/Backend/Torch/Models/Ops/Motion.py
 create mode 100644 VSR/Backend/Torch/Models/Ops/Scale.py
 create mode 100644 VSR/Backend/Torch/Models/Ops/__init__.py
 create mode 100644 VSR/Backend/Torch/Models/Optim/SISR.py
 delete mode 100644 VSR/Backend/Torch/Models/carn/__init__.py
 delete mode 100644 VSR/Backend/Torch/Models/carn/carn.py
 delete mode 100644 VSR/Backend/Torch/Models/carn/carn_m.py
 delete mode 100644 VSR/Backend/Torch/Models/carn/ops.py
 delete mode 100644 VSR/Backend/Torch/Models/dbpn/__init__.py
 delete mode 100644 VSR/Backend/Torch/Models/dbpn/base_networks.py
 delete mode 100644 VSR/Backend/Torch/Models/dbpn/dbpn.py
 delete mode 100644 VSR/Backend/Torch/Models/dbpn/dbpn_v1.py
 delete mode 100644 VSR/Backend/Torch/Models/dbpn/dbpns.py
 delete mode 100644 VSR/Backend/Torch/Models/edsr/__init__.py
 delete mode 100644 VSR/Backend/Torch/Models/edsr/common.py
 delete mode 100644 VSR/Backend/Torch/Models/edsr/edsr.py
 delete mode 100644 VSR/Backend/Torch/Models/edsr/mdsr.py
 delete mode 100644 VSR/Backend/Torch/Models/esrgan/__init__.py
 delete mode 100644 VSR/Backend/Torch/Models/esrgan/architecture.py
 delete mode 100644 VSR/Backend/Torch/Models/esrgan/block.py
 delete mode 100644 VSR/Backend/Torch/Models/frvsr/__init__.py
 delete mode 100644 VSR/Backend/Torch/Models/frvsr/ops.py
 delete mode 100644 VSR/Backend/Torch/Models/msrn/__init__.py
 delete mode 100644 VSR/Backend/Torch/Models/msrn/msrn.py
 delete mode 100644 VSR/Backend/Torch/Models/rbpn/__init__.py
 delete mode 100644 VSR/Backend/Torch/Models/rbpn/base_network.py
 delete mode 100644 VSR/Backend/Torch/Models/rbpn/ops.py
 delete mode 100644 VSR/Backend/Torch/Models/rcan/__init__.py
 delete mode 100644 VSR/Backend/Torch/Models/rcan/common.py
 delete mode 100644 VSR/Backend/Torch/Models/rcan/rcan.py
 delete mode 100644 VSR/Backend/Torch/Models/sof/__init__.py
 delete mode 100644 VSR/Backend/Torch/Models/sof/modules.py
 delete mode 100644 VSR/Backend/Torch/Models/spmc/__init__.py
 delete mode 100644 VSR/Backend/Torch/Models/spmc/ops.py
 delete mode 100644 VSR/Backend/Torch/Models/srfeat/__init__.py
 delete mode 100644 VSR/Backend/Torch/Models/srfeat/ops.py
 delete mode 100644 VSR/Backend/Torch/Models/srmd/__init__.py
 delete mode 100644 VSR/Backend/Torch/Models/srmd/ops.py
 delete mode 100644 VSR/Backend/Torch/Models/teco/__init__.py
 delete mode 100644 VSR/Backend/Torch/Models/teco/ops.py
 delete mode 100644 VSR/Backend/Torch/Models/vespcn/__init__.py
 delete mode 100644 VSR/Backend/Torch/Models/vespcn/ops.py
 delete mode 100644 VSR/Backend/Torch/Models/video/__init__.py
 delete mode 100644 VSR/Backend/Torch/Models/video/motion.py
 create mode 100644 VSR/Backend/Torch/Util/Distortion.py
 create mode 100644 VSR/Backend/Torch/__init__.py
 rename VSR/{Backend/Torch/Models/srmd/pca.py => Util/PcaPrecompute.py} (99%)

diff --git a/Tests/space_to_depth_test.py b/Tests/space_to_depth_test.py
index a8a558e..7d9ab3e 100644
--- a/Tests/space_to_depth_test.py
+++ b/Tests/space_to_depth_test.py
@@ -13,7 +13,7 @@
   import torch
   import torchvision
   from torch.nn import PixelShuffle
-  from VSR.Backend.Torch.Models.Arch import SpaceToDim
+  from VSR.Backend.Torch.Models.Ops.Scale import SpaceToDim
 except ImportError:
   exit(0)
 
diff --git a/Train/par/pytorch/dbpn.yml b/Train/par/pytorch/dbpn.yml
index ee29eb5..f786457 100644
--- a/Train/par/pytorch/dbpn.yml
+++ b/Train/par/pytorch/dbpn.yml
@@ -1,7 +1,6 @@
 dbpn:
     scale: 4
-    mode: 'dbpn'
-    num_channels: 3
+    channel: 3
     base_filter: 64
     feat: 256
     num_stages: 7
diff --git a/Train/par/pytorch/edsr.yml b/Train/par/pytorch/edsr.yml
index 5d771b0..a52a5c6 100644
--- a/Train/par/pytorch/edsr.yml
+++ b/Train/par/pytorch/edsr.yml
@@ -1,10 +1,8 @@
 edsr:
     scale: 4
+    channel: 3
     n_resblocks: 16
     n_feats: 64
-    rgb_range: 255
-    res_scale: 1
-    n_colors: 3
 
 batch_shape: [8, 3, 48, 48]
 lr: 1.0e-4
diff --git a/Train/par/pytorch/esrgan.yml b/Train/par/pytorch/esrgan.yml
index 51226ba..d8af6ab 100644
--- a/Train/par/pytorch/esrgan.yml
+++ b/Train/par/pytorch/esrgan.yml
@@ -1,14 +1,9 @@
 esrgan:
     scale: 4
-    in_nc: 3
-    out_nc: 3
+    channel: 3
     nf: 64
     nb: 23
     gc: 32
-    act_type: 'leakyrelu'
-    mode: 'CNA'
-    res_scale: 1
-    upsample_mode: 'upconv'
     weights: [0.01, 1, 5.0e-3]
     patch_size: 128
 
diff --git a/Train/par/pytorch/msrn.yml b/Train/par/pytorch/msrn.yml
index 4de2fe3..275606d 100644
--- a/Train/par/pytorch/msrn.yml
+++ b/Train/par/pytorch/msrn.yml
@@ -1,7 +1,7 @@
 msrn:
     scale: 4
+    channel: 3
     rgb_range: 255
-    n_colors: 3
 
 batch_shape: [16, 3, 32, 32]
 lr: 1.0e-4
diff --git a/Train/par/pytorch/rcan.yml b/Train/par/pytorch/rcan.yml
index fb27593..06e6434 100644
--- a/Train/par/pytorch/rcan.yml
+++ b/Train/par/pytorch/rcan.yml
@@ -1,12 +1,11 @@
 rcan:
     scale: 4
+    channel: 3
     n_resgroups: 10
     n_resblocks: 20
     n_feats: 64
     reduction: 16
     rgb_range: 255
-    n_colors: 3
-    res_scale: 1.0
 
 batch_shape: [16, 3, 32, 32]
 lr: 1.0e-4
diff --git a/Train/par/pytorch/realsr.yml b/Train/par/pytorch/realsr.yml
new file mode 100644
index 0000000..0be0476
--- /dev/null
+++ b/Train/par/pytorch/realsr.yml
@@ -0,0 +1,10 @@
+realsr:
+    scale: 4
+    channel: 3
+    nf: 64
+    nb: 23
+    pixel_weight: !!float 1
+    feature_weight: !!float 0
+    gan_weight: !!float 0
+
+batch_shape: [16, 3, 64, 64]
diff --git a/Train/par/pytorch/srmd.yml b/Train/par/pytorch/srmd.yml
index ad4aa73..239ffac 100644
--- a/Train/par/pytorch/srmd.yml
+++ b/Train/par/pytorch/srmd.yml
@@ -2,12 +2,14 @@
 srmd:
     scale: 4
     channel: 3
+    layers: 12
+    filters: 128
     degradation:
-        kernel_type: 'isotropic'  # isotropic or anisotropic
-        l1: 2.0   # scaling of eigen values on base 0. [0.1, 10]
-        l2: 2.0   # scaling of eigen values on base 1. [0.1, l1]
+        kernel_type: 'anisotropic'  # isotropic or anisotropic
+        l1: 0.1   # scaling of eigen values on base 0. [0.1, 10]
+        l2: 0.1   # scaling of eigen values on base 1. [0.1, l1]
         theta: 0.0  # rotation angle (rad) of the kernel. [0, pi]
-        noise: 5.0  # noise stddev (0, 75]
+        noise: 5  # noise stddev (0, 75]
 
 batch_shape: [16, 3, 64, 64]
 lr: 1.0e-4
diff --git a/VSR/Backend/Torch/Framework/Environment.py b/VSR/Backend/Torch/Framework/Environment.py
index 74cfce8..cf1ccc2 100644
--- a/VSR/Backend/Torch/Framework/Environment.py
+++ b/VSR/Backend/Torch/Framework/Environment.py
@@ -45,6 +45,7 @@ def __init__(self, model, work_dir=None):
       self._saved = Path(work_dir) / 'save'
       self._logd = Path(work_dir) / 'log'
     self._restored = False
+    self.last_epoch = 0
 
   def _startup(self):
     if isinstance(self._saved, Path):
@@ -125,7 +126,7 @@ def _save_model(self, step):
 
   def _restore(self, epoch=None, map_location=None):
     # restore graph
-    if self._restored:
+    if self._restored or self.model.loaded:
       return self.last_epoch
     self.last_epoch = self._restore_model(epoch, map_location=map_location)
     self._restored = True
diff --git a/VSR/Backend/Torch/Models/Arch.py b/VSR/Backend/Torch/Models/Arch.py
deleted file mode 100644
index aaa3f81..0000000
--- a/VSR/Backend/Torch/Models/Arch.py
+++ /dev/null
@@ -1,393 +0,0 @@
-#  Copyright (c): Wenyi Tang 2017-2019.
-#  Author: Wenyi Tang
-#  Email: wenyi.tang@intel.com
-#  Update Date: 2019/4/3 下午5:10
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-from VSR.Util.Utility import to_list
-
-
-class EasyConv2d(nn.Module):
-  def __init__(self, in_channels, out_channels, kernel_size,
-               stride=1, padding='same', dilation=1, groups=1,
-               activation=None, use_bias=True, use_bn=False, use_sn=False):
-    super(EasyConv2d, self).__init__()
-    assert padding.lower() in ('same', 'valid')
-    if padding == 'same':
-      padding_ = (kernel_size - 1) // 2
-    else:
-      padding_ = 0
-    net = [nn.Conv2d(in_channels, out_channels, kernel_size, stride,
-                     padding_, dilation, groups, use_bias)]
-    if use_sn:
-      net[0] = nn.utils.spectral_norm(net[0])
-    if use_bn:
-      net += [nn.BatchNorm2d(out_channels)]
-    if activation:
-      net += [Activation(activation, in_place=True)]
-    self.body = nn.Sequential(*net)
-
-  def forward(self, x):
-    return self.body(x)
-
-  def initialize_(self, kernel, bias=None):
-    """initialize the convolutional weights from external sources
-
-    Args:
-        kernel: kernel weight. Shape=[OUT, IN, K, K]
-        bias: bias weight. Shape=[OUT]
-    """
-
-    dtype = self.body[0].weight.dtype
-    device = self.body[0].weight.device
-    kernel = torch.tensor(kernel, dtype=dtype, device=device,
-                          requires_grad=True)
-    assert kernel.shape == self.body[0].weight.shape, "Wrong kernel shape!"
-    if bias is not None:
-      bias = torch.tensor(bias, dtype=dtype, device=device, requires_grad=True)
-      assert bias.shape == self.body[0].bias.shape, "Wrong bias shape!"
-    self.body[0].weight.data.copy_(kernel)
-    self.body[0].bias.data.copy_(bias)
-
-
-class RB(nn.Module):
-  def __init__(self, channels, kernel_size, activation=None, use_bias=True,
-               use_bn=False, use_sn=False, act_first=None):
-    super(RB, self).__init__()
-    in_c, out_c = to_list(channels, 2)
-    conv1 = nn.Conv2d(
-        in_c, out_c, kernel_size, 1, kernel_size // 2, bias=use_bias)
-    conv2 = nn.Conv2d(
-        out_c, out_c, kernel_size, 1, kernel_size // 2, bias=use_bias)
-    if use_sn:
-      conv1 = nn.utils.spectral_norm(conv1)
-      conv2 = nn.utils.spectral_norm(conv2)
-    net = [conv1, Activation(activation, in_place=True), conv2]
-    if use_bn:
-      net.insert(1, nn.BatchNorm2d(out_c))
-      if act_first:
-        net = [nn.BatchNorm2d(in_c), Activation(activation, in_place=True)] + \
-              net
-      else:
-        net.append(nn.BatchNorm2d(out_c))
-    self.body = nn.Sequential(*net)
-    if in_c != out_c:
-      self.shortcut = nn.Conv2d(in_c, out_c, 1)
-
-  def forward(self, x):
-    out = self.body(x)
-    if hasattr(self, 'shortcut'):
-      sc = self.shortcut(x)
-      return out + sc
-    return out + x
-
-
-class Rdb(nn.Module):
-  def __init__(self, channels, depth=3, scaling=1.0, name='Rdb', **kwargs):
-    super(Rdb, self).__init__()
-    self.name = name
-    self.depth = depth
-    self.scaling = scaling
-    in_c, out_c = to_list(channels, 2)
-    ks = kwargs.get('kernel_size', 3)
-    stride = kwargs.get('stride', 1)
-    padding = kwargs.get('padding', ks // 2)
-    dilation = kwargs.get('dilation', 1)
-    group = kwargs.get('group', 1)
-    bias = kwargs.get('bias', True)
-    act = kwargs.get('activation', 'relu')
-    for i in range(depth):
-      conv = nn.Conv2d(
-          in_c + out_c * i, out_c, ks, stride, padding, dilation, group, bias)
-      if i < depth - 1:  # no activation after last layer
-        conv = nn.Sequential(conv, Activation(act))
-      setattr(self, f'conv_{i}', conv)
-
-  def forward(self, inputs):
-    fl = [inputs]
-    for i in range(self.depth):
-      conv = getattr(self, f'conv_{i}')
-      fl.append(conv(torch.cat(fl, dim=1)))
-    return fl[-1] * self.scaling + inputs
-
-  def extra_repr(self):
-    return f"{self.name}: depth={self.depth}, scaling={self.scaling}"
-
-
-class Rcab(nn.Module):
-  def __init__(self, channels, ratio=16, name='RCAB', **kwargs):
-    super(Rcab, self).__init__()
-    self.name = name
-    self.ratio = ratio
-    in_c, out_c = to_list(channels, 2)
-    ks = kwargs.get('kernel_size', 3)
-    padding = kwargs.get('padding', ks // 2)
-    group = kwargs.get('group', 1)
-    bias = kwargs.get('bias', True)
-    self.c1 = nn.Sequential(
-        nn.Conv2d(in_c, out_c, ks, 1, padding, 1, group, bias),
-        nn.ReLU(True))
-    self.c2 = nn.Conv2d(out_c, out_c, ks, 1, padding, 1, group, bias)
-    self.c3 = nn.Sequential(
-        nn.Conv2d(out_c, out_c // ratio, 1, groups=group, bias=bias),
-        nn.ReLU(True))
-    self.c4 = nn.Sequential(
-        nn.Conv2d(out_c // ratio, in_c, 1, groups=group, bias=bias),
-        nn.Sigmoid())
-    self.pooling = nn.AdaptiveAvgPool2d(1)
-
-  def forward(self, inputs):
-    x = self.c1(inputs)
-    y = self.c2(x)
-    x = self.pooling(y)
-    x = self.c3(x)
-    x = self.c4(x)
-    y = x * y
-    return inputs + y
-
-  def extra_repr(self):
-    return f"{self.name}: ratio={self.ratio}"
-
-
-class CascadeRdn(nn.Module):
-  def __init__(self, channels, depth=3, use_ca=False, name='CascadeRdn',
-               **kwargs):
-    super(CascadeRdn, self).__init__()
-    self.name = name
-    self.depth = to_list(depth, 2)
-    self.ca = use_ca
-    in_c, out_c = to_list(channels, 2)
-    for i in range(self.depth[0]):
-      setattr(self, f'conv11_{i}', nn.Conv2d(in_c + out_c * (i + 1), out_c, 1))
-      setattr(self, f'rdn_{i}', Rdb(channels, self.depth[1], **kwargs))
-      if use_ca:
-        setattr(self, f'rcab_{i}', Rcab(channels))
-
-  def forward(self, inputs):
-    fl = [inputs]
-    x = inputs
-    for i in range(self.depth[0]):
-      rdn = getattr(self, f'rdn_{i}')
-      x = rdn(x)
-      if self.ca:
-        rcab = getattr(self, f'rcab_{i}')
-        x = rcab(x)
-      fl.append(x)
-      c11 = getattr(self, f'conv11_{i}')
-      x = c11(torch.cat(fl, dim=1))
-
-    return x
-
-  def extra_repr(self):
-    return f"{self.name}: depth={self.depth}, ca={self.ca}"
-
-
-class Activation(nn.Module):
-  def __init__(self, name, *args, **kwargs):
-    super(Activation, self).__init__()
-    if name is None:
-      self.f = lambda t: t
-    self.name = name.lower()
-    in_place = kwargs.get('in_place', True)
-    if self.name == 'relu':
-      self.f = nn.ReLU(in_place)
-    elif self.name == 'prelu':
-      self.f = nn.PReLU()
-    elif self.name in ('lrelu', 'leaky', 'leakyrelu'):
-      self.f = nn.LeakyReLU(*args, inplace=in_place)
-    elif self.name == 'tanh':
-      self.f = nn.Tanh()
-    elif self.name == 'sigmoid':
-      self.f = nn.Sigmoid()
-
-  def forward(self, x):
-    return self.f(x)
-
-
-class _UpsampleNearest(nn.Module):
-  def __init__(self, scale):
-    super(_UpsampleNearest, self).__init__()
-    self.scale = scale
-
-  def forward(self, x, scale=None):
-    scale = scale or self.scale
-    return F.interpolate(x, scale_factor=scale)
-
-
-class _UpsampleLinear(nn.Module):
-  def __init__(self, scale):
-    super(_UpsampleLinear, self).__init__()
-    self._mode = ('linear', 'bilinear', 'trilinear')
-    self.scale = scale
-
-  def forward(self, x, scale=None):
-    scale = scale or self.scale
-    mode = self._mode[x.dim() - 3]
-    return F.interpolate(x, scale_factor=scale, mode=mode, align_corners=False)
-
-
-class Upsample(nn.Module):
-  def __init__(self, channel, scale, method='ps', name='Upsample', **kwargs):
-    super(Upsample, self).__init__()
-    self.name = name
-    self.channel = channel
-    self.scale = scale
-    self.method = method.lower()
-    self.kernel_size = kwargs.get('kernel_size', 3)
-
-    _allowed_methods = ('ps', 'nearest', 'deconv', 'linear')
-    assert self.method in _allowed_methods
-    act = kwargs.get('activation')
-
-    samplers = []
-    while scale > 1:
-      if scale % 2 == 1 or scale == 2:
-        samplers.append(self.upsampler(self.method, scale))
-        break
-      else:
-        samplers.append(self.upsampler(self.method, 2, act))
-        scale //= 2
-    self.body = nn.Sequential(*samplers)
-
-  def upsampler(self, method, scale, activation=None):
-    body = []
-    k = self.kernel_size
-    if method == 'ps':
-      p = k // 2  # padding
-      s = 1  # strides
-      body = [nn.Conv2d(self.channel, self.channel * scale * scale, k, s, p),
-              nn.PixelShuffle(scale)]
-      if activation:
-        body.insert(1, Activation(activation))
-    if method == 'deconv':
-      q = k % 2  # output padding
-      p = (k + q) // 2 - 1  # padding
-      s = scale  # strides
-      body = [nn.ConvTranspose2d(self.channel, self.channel, k, s, p, q)]
-      if activation:
-        body.insert(1, Activation(activation))
-    if method == 'nearest':
-      body = [_UpsampleNearest(scale),
-              EasyConv2d(self.channel, self.channel, k, activation=activation)]
-    if method == 'linear':
-      body = [_UpsampleLinear(scale),
-              EasyConv2d(self.channel, self.channel, k, activation=activation)]
-    return nn.Sequential(*body)
-
-  def forward(self, inputs):
-    return self.body(inputs)
-
-  def extra_repr(self):
-    return f"{self.name}: scale={self.scale}"
-
-
-class SpaceToDim(nn.Module):
-  def __init__(self, scale_factor, dims=(-2, -1), dim=0):
-    super(SpaceToDim, self).__init__()
-    self.scale_factor = scale_factor
-    self.dims = dims
-    self.dim = dim
-
-  def forward(self, x):
-    _shape = list(x.shape)
-    shape = _shape.copy()
-    dims = [x.dim() + self.dims[0] if self.dims[0] < 0 else self.dims[0],
-            x.dim() + self.dims[1] if self.dims[1] < 0 else self.dims[1]]
-    dims = [max(abs(dims[0]), abs(dims[1])),
-            min(abs(dims[0]), abs(dims[1]))]
-    if self.dim in dims:
-      raise RuntimeError("Integrate dimension can't be space dimension!")
-    shape[dims[0]] //= self.scale_factor
-    shape[dims[1]] //= self.scale_factor
-    shape.insert(dims[0] + 1, self.scale_factor)
-    shape.insert(dims[1] + 1, self.scale_factor)
-    dim = self.dim if self.dim < dims[1] else self.dim + 1
-    dim = dim if dim <= dims[0] else dim + 1
-    x = x.reshape(*shape)
-    perm = [dim, dims[1] + 1, dims[0] + 2]
-    perm = [i for i in range(min(perm))] + perm
-    perm.extend((i for i in range(x.dim()) if i not in perm))
-    x = x.permute(*perm)
-    shape = _shape
-    shape[self.dim] *= self.scale_factor ** 2
-    shape[self.dims[0]] //= self.scale_factor
-    shape[self.dims[1]] //= self.scale_factor
-    return x.reshape(*shape)
-
-  def extra_repr(self):
-    return f'scale_factor={self.scale_factor}'
-
-
-class SpaceToDepth(nn.Module):
-  def __init__(self, block_size):
-    super(SpaceToDepth, self).__init__()
-    self.body = SpaceToDim(block_size, dim=1)
-
-  def forward(self, x):
-    return self.body(x)
-
-
-class SpaceToBatch(nn.Module):
-  def __init__(self, block_size):
-    super(SpaceToBatch, self).__init__()
-    self.body = SpaceToDim(block_size, dim=0)
-
-  def forward(self, x):
-    return self.body(x)
-
-
-class CBAM(nn.Module):
-  """Convolutional Block Attention Module (ECCV 18)
-  - CA: channel attention module
-  - SA: spatial attention module
-
-  Args:
-    channels: input channel of tensors
-    channel_reduction: reduction ratio in `CA`
-    spatial_first: put SA ahead of CA (default: CA->SA)
-  """
-
-  class CA(nn.Module):
-    def __init__(self, channels, ratio=16):
-      super(CBAM.CA, self).__init__()
-      self.max_pool = nn.AdaptiveMaxPool2d(1)
-      self.avg_pool = nn.AdaptiveAvgPool2d(1)
-      self.mlp = nn.Sequential(
-          nn.Conv2d(channels, channels // ratio, 1),
-          nn.ReLU(),
-          nn.Conv2d(channels // ratio, channels, 1))
-
-    def forward(self, x):
-      maxpool = self.max_pool(x)
-      avgpool = self.avg_pool(x)
-      att = F.sigmoid(self.mlp(maxpool) + self.mlp(avgpool))
-      return att * x
-
-  class SA(nn.Module):
-    def __init__(self, kernel_size=7):
-      super(CBAM.SA, self).__init__()
-      self.conv = nn.Conv2d(2, 1, kernel_size, 1, kernel_size // 2)
-
-    def forward(self, x):
-      max_c_pool = x.max(dim=1, keepdim=True)
-      avg_c_pool = x.mean(dim=1, keepdim=True)
-      y = torch.cat([max_c_pool, avg_c_pool], dim=1)
-      att = F.sigmoid(self.conv(y))
-      return att * x
-
-  def __init__(self, channels, channel_reduction=16, spatial_first=None):
-    super(CBAM, self).__init__()
-    self.channel_attention = CBAM.CA(channels, ratio=channel_reduction)
-    self.spatial_attention = CBAM.SA(7)
-    self.spatial_first = spatial_first
-
-  def forward(self, inputs):
-    if self.spatial_first:
-      x = self.spatial_attention(inputs)
-      return self.channel_attention(x)
-    else:
-      x = self.channel_attention(inputs)
-      return self.spatial_attention(x)
diff --git a/VSR/Backend/Torch/Models/Carn.py b/VSR/Backend/Torch/Models/Carn.py
index cb9b867..489e0f4 100644
--- a/VSR/Backend/Torch/Models/Carn.py
+++ b/VSR/Backend/Torch/Models/Carn.py
@@ -3,13 +3,140 @@
 #  Email: wenyi.tang@intel.com
 #  Update Date: 2019 - 3 - 13
 
+import logging
+
 import torch
+import torch.nn as nn
 import torch.nn.functional as F
 
 from .Model import SuperResolution
-from .carn import carn, carn_m
+from .Ops.Blocks import EasyConv2d, MeanShift, RB
+from .Ops.Scale import MultiscaleUpsample, Upsample
 from ..Util import Metrics
 
+_logger = logging.getLogger("VSR.CARN")
+_logger.info("LICENSE: CARN is implemented by Namhyuk Ahn. "
+             "@nmhkahn https://github.com/nmhkahn/CARN-pytorch")
+
+
+class EResidualBlock(nn.Module):
+  def __init__(self, in_channels, out_channels, group):
+    super(EResidualBlock, self).__init__()
+
+    self.body = nn.Sequential(
+        nn.Conv2d(in_channels, out_channels, 3, 1, 1, groups=group),
+        nn.ReLU(inplace=True),
+        nn.Conv2d(out_channels, out_channels, 3, 1, 1, groups=group),
+        nn.ReLU(inplace=True),
+        nn.Conv2d(out_channels, out_channels, 1, 1, 0),
+    )
+
+  def forward(self, x):
+    out = self.body(x)
+    return out + x
+
+
+class ResidualBlock(nn.Module):
+  def __init__(self,
+               in_channels, out_channels):
+    super(ResidualBlock, self).__init__()
+
+    self.body = nn.Sequential(
+        nn.Conv2d(in_channels, out_channels, 3, 1, 1),
+        nn.ReLU(inplace=True),
+        nn.Conv2d(out_channels, out_channels, 3, 1, 1),
+    )
+
+  def forward(self, x):
+    out = self.body(x)
+    out = F.relu(out + x)
+    return out
+
+
+class Block(nn.Module):
+  def __init__(self, in_channels, out_channels, group=1):
+    """ CARN cascading residual block
+    """
+    super(Block, self).__init__()
+    if group == 1:
+      self.b1 = RB(in_channels, out_channels, activation='relu')
+      self.b2 = RB(out_channels, out_channels, activation='relu')
+      self.b3 = RB(out_channels, out_channels, activation='relu')
+    elif group > 1:
+      self.b1 = EResidualBlock(64, 64, group=group)
+      self.b2 = self.b3 = self.b1
+    self.c1 = EasyConv2d(in_channels + out_channels, out_channels, 1,
+                         activation='relu')
+    self.c2 = EasyConv2d(in_channels + out_channels * 2, out_channels, 1,
+                         activation='relu')
+    self.c3 = EasyConv2d(in_channels + out_channels * 3, out_channels, 1,
+                         activation='relu')
+
+  def forward(self, x):
+    c0 = o0 = x
+
+    b1 = F.relu(self.b1(o0))
+    c1 = torch.cat([c0, b1], dim=1)
+    o1 = self.c1(c1)
+
+    b2 = F.relu(self.b2(o1))
+    c2 = torch.cat([c1, b2], dim=1)
+    o2 = self.c2(c2)
+
+    b3 = F.relu(self.b3(o2))
+    c3 = torch.cat([c2, b3], dim=1)
+    o3 = self.c3(c3)
+
+    return o3
+
+
+class Net(nn.Module):
+  def __init__(self, scale, multi_scale=None, group=1):
+    super(Net, self).__init__()
+
+    self.sub_mean = MeanShift((0.4488, 0.4371, 0.4040), sub=True)
+    self.add_mean = MeanShift((0.4488, 0.4371, 0.4040), sub=False)
+
+    self.entry = nn.Conv2d(3, 64, 3, 1, 1)
+
+    self.b1 = Block(64, 64, group=group)
+    self.b2 = Block(64, 64, group=group)
+    self.b3 = Block(64, 64, group=group)
+    self.c1 = EasyConv2d(64 * 2, 64, 1, activation='relu')
+    self.c2 = EasyConv2d(64 * 3, 64, 1, activation='relu')
+    self.c3 = EasyConv2d(64 * 4, 64, 1, activation='relu')
+
+    if multi_scale:
+      self.upsample = MultiscaleUpsample(64, scales=(2, 3, 4), group=group,
+                                         activation='relu')
+    else:
+      self.upsample = Upsample(64, scale=scale, group=group, activation='relu')
+    self.exit = nn.Conv2d(64, 3, 3, 1, 1)
+
+  def forward(self, x, scale=None):
+    x = self.sub_mean(x)
+    x = self.entry(x)
+    c0 = o0 = x
+
+    b1 = self.b1(o0)
+    c1 = torch.cat([c0, b1], dim=1)
+    o1 = self.c1(c1)
+
+    b2 = self.b2(o1)
+    c2 = torch.cat([c1, b2], dim=1)
+    o2 = self.c2(c2)
+
+    b3 = self.b3(o2)
+    c3 = torch.cat([c2, b3], dim=1)
+    o3 = self.c3(c3)
+
+    out = self.upsample(o3, scale=scale)
+
+    out = self.exit(out)
+    out = self.add_mean(out)
+
+    return out
+
 
 class CARN(SuperResolution):
   def __init__(self, scale, channel, **kwargs):
@@ -17,10 +144,7 @@ def __init__(self, scale, channel, **kwargs):
     group = kwargs.get('group', 1)
     ms = kwargs.get('multi_scale', 0)
     self.clip = kwargs.get('clip', 10)
-    if group > 1:
-      self.carn = carn_m.Net(group=group, scale=scale, multi_scale=ms)
-    else:
-      self.carn = carn.Net(scale=scale, multi_scale=ms)
+    self.carn = Net(group=group, scale=scale, multi_scale=ms)
     self.opt = torch.optim.Adam(self.trainable_variables(), 1e-4)
 
   def train(self, inputs, labels, learning_rate=None):
diff --git a/VSR/Backend/Torch/Models/Classic.py b/VSR/Backend/Torch/Models/Classic.py
index ebbb102..477a014 100644
--- a/VSR/Backend/Torch/Models/Classic.py
+++ b/VSR/Backend/Torch/Models/Classic.py
@@ -5,14 +5,10 @@
 
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 
-from .Arch import EasyConv2d, RB
-from .Loss import VggFeatureLoss
-from .Model import SuperResolution
-from ..Util import Metrics
+from .Optim.SISR import PerceptualOptimizer
+from .Ops.Blocks import EasyConv2d, RB
 from ..Util.Utility import upsample
-from ..Framework.Summary import get_writer
 
 
 class Espcn(nn.Module):
@@ -34,9 +30,9 @@ class Srcnn(nn.Module):
   def __init__(self, channel, filters=(9, 5, 5)):
     super(Srcnn, self).__init__()
     self.net = nn.Sequential(
-      EasyConv2d(channel, 64, filters[0], activation='relu'),
-      EasyConv2d(64, 32, filters[1], activation='relu'),
-      EasyConv2d(32, channel, filters[2], activation=None))
+        EasyConv2d(channel, 64, filters[0], activation='relu'),
+        EasyConv2d(64, 32, filters[1], activation='relu'),
+        EasyConv2d(32, channel, filters[2], activation=None))
 
   def forward(self, x):
     return self.net(x)
@@ -74,11 +70,11 @@ def __init__(self, scale, channel, n_recur, filters):
 
     super(Drcn, self).__init__()
     self.entry = nn.Sequential(
-      EasyConv2d(channel, filters, 3, activation='relu'),
-      EasyConv2d(filters, filters, 3, activation='relu'))
+        EasyConv2d(channel, filters, 3, activation='relu'),
+        EasyConv2d(filters, filters, 3, activation='relu'))
     self.exit = nn.Sequential(
-      EasyConv2d(filters, filters, 3, activation='relu'),
-      EasyConv2d(filters, channel, 3))
+        EasyConv2d(filters, filters, 3, activation='relu'),
+        EasyConv2d(filters, channel, 3))
     self.conv = EasyConv2d(filters, filters, 3, activation='relu')
     self.output_weights = Parameter(torch.empty(n_recur + 1))
     torch.nn.init.uniform_(self.output_weights, 0, 1)
@@ -105,7 +101,7 @@ def __init__(self, channel, n_ru, n_rb, filters):
       setattr(self, f'entry{i}',
               EasyConv2d(filters, filters, 3, activation='relu'))
     self.n_rb = n_rb
-    self.rb = RB(filters, 3, activation='relu')
+    self.rb = RB(filters, kernel_size=3, activation='relu')
     self.n_ru = n_ru
     self.exit = EasyConv2d(filters, channel, 3)
 
@@ -119,115 +115,10 @@ def forward(self, x):
     return self.exit(x)
 
 
-class PerceptualOptimizer(SuperResolution):
-  def __init__(self, scale, channel, image_weight=1, feature_weight=0,
-               **kwargs):
-    super(PerceptualOptimizer, self).__init__(scale, channel, **kwargs)
-    if feature_weight > 0:
-      # tricks: do not save weights of vgg
-      self.feature = [VggFeatureLoss(['block3_conv4'], True)]
-    self.w = [image_weight, feature_weight]
-    self.clip = kwargs.get('clip')
-    self.opt_config = kwargs.get('opt')
-
-  def get_opt(self, params, lr):
-    if self.opt_config is None:
-      return torch.optim.Adam(params, lr=lr)
-    if self.opt_config.get('name') == 'Adadelta':
-      kwargs = self.opt_config
-      kwargs.pop('name')
-      return torch.optim.Adadelta(params, lr=lr, **kwargs)
-    elif self.opt_config.get('name') == 'Adagrad':
-      kwargs = self.opt_config
-      kwargs.pop('name')
-      return torch.optim.Adagrad(params, lr=lr, **kwargs)
-    elif self.opt_config.get('name') == 'Adam':
-      kwargs = self.opt_config
-      kwargs.pop('name')
-      return torch.optim.Adam(params, lr=lr, **kwargs)
-    elif self.opt_config.get('name') == 'SparseAdam':
-      kwargs = self.opt_config
-      kwargs.pop('name')
-      return torch.optim.SparseAdam(params, lr=lr, **kwargs)
-    elif self.opt_config.get('name') == 'Adamax':
-      kwargs = self.opt_config
-      kwargs.pop('name')
-      return torch.optim.Adamax(params, lr=lr, **kwargs)
-    elif self.opt_config.get('name') == 'ASGD':
-      kwargs = self.opt_config
-      kwargs.pop('name')
-      return torch.optim.ASGD(params, lr=lr, **kwargs)
-    elif self.opt_config.get('name') == 'SGD':
-      kwargs = self.opt_config
-      kwargs.pop('name')
-      return torch.optim.SGD(params, lr=lr, **kwargs)
-    elif self.opt_config.get('name') == 'LBFGS':
-      kwargs = self.opt_config
-      kwargs.pop('name')
-      return torch.optim.LBFGS(params, lr=lr, **kwargs)
-    elif self.opt_config.get('name') == 'Rprop':
-      kwargs = self.opt_config
-      kwargs.pop('name')
-      return torch.optim.Rprop(params, lr=lr, **kwargs)
-    elif self.opt_config.get('name') == 'RMSprop':
-      kwargs = self.opt_config
-      kwargs.pop('name')
-      return torch.optim.RMSprop(params, lr=lr, **kwargs)
-
-  def cuda(self):
-    super(PerceptualOptimizer, self).cuda()
-    if self.w[1] > 0:
-      self.feature[0].cuda()
-
-  def train(self, inputs, labels, learning_rate=None):
-    sr = self.fn(inputs[0])
-    image_loss = F.mse_loss(sr, labels[0])
-    loss = image_loss * self.w[0]
-    if self.w[1] > 0:
-      self.feature[0].eval()
-      # sr = self.fn(inputs[0])
-      feat_fake = self.feature[0](sr)[0]
-      feat_real = self.feature[0](labels[0])[0]
-      feature_loss = F.mse_loss(feat_fake, feat_real)
-      loss += feature_loss * self.w[1]
-    opt = list(self.opts.values())[0]
-    if learning_rate:
-      for param_group in opt.param_groups:
-        param_group["lr"] = learning_rate
-    opt.zero_grad()
-    loss.backward()
-    if self.clip:
-      clip = self.clip / learning_rate
-      nn.utils.clip_grad_norm_(self.trainable_variables(), clip)
-    opt.step()
-    return {
-      'loss': loss.detach().cpu().numpy(),
-      'image': image_loss.detach().cpu().numpy(),
-    }
-
-  def eval(self, inputs, labels=None, **kwargs):
-    metrics = {}
-    sr = self.fn(inputs[0]).detach().cpu()
-    bi = upsample(inputs[0], self.scale).detach().cpu()
-    if labels is not None:
-      metrics['psnr'] = Metrics.psnr(sr.numpy(), labels[0].cpu().numpy())
-      writer = get_writer(self.name)
-      if writer is not None:
-        step = kwargs.get('epoch')
-        writer.image('sr', sr.clamp(0, 1), max=1, step=step)
-        writer.image('bicubic', bi.clamp(0, 1), max=1, step=step)
-        writer.image('gt', labels[0], max=1, step=step)
-    return [sr.numpy()], metrics
-
-  def fn(self, tensor):
-    raise NotImplementedError
-
-
 class ESPCN(PerceptualOptimizer):
   def __init__(self, scale, channel, **kwargs):
-    super(ESPCN, self).__init__(scale, channel, **kwargs)
     self.espcn = Espcn(channel, scale)
-    self.opt = self.get_opt(self.trainable_variables(), 1e-4)
+    super(ESPCN, self).__init__(scale, channel, **kwargs)
 
   def fn(self, tensor):
     return self.espcn(tensor * 2 - 1) / 2 + 0.5
@@ -235,10 +126,9 @@ def fn(self, tensor):
 
 class SRCNN(PerceptualOptimizer):
   def __init__(self, scale, channel, **kwargs):
-    super(SRCNN, self).__init__(scale, channel, **kwargs)
     filters = kwargs.get('filters', (9, 5, 5))
     self.srcnn = Srcnn(channel, filters)
-    self.opt = self.get_opt(self.trainable_variables(), 1e-4)
+    super(SRCNN, self).__init__(scale, channel, **kwargs)
 
   def fn(self, tensor):
     x = upsample(tensor, self.scale)
@@ -247,10 +137,9 @@ def fn(self, tensor):
 
 class VDSR(PerceptualOptimizer):
   def __init__(self, scale, channel, **kwargs):
-    super(VDSR, self).__init__(scale, channel, **kwargs)
     layers = kwargs.get('layers', 20)
     self.vdsr = Vdsr(channel, layers)
-    self.opt = self.get_opt(self.trainable_variables(), 1e-4)
+    super(VDSR, self).__init__(scale, channel, **kwargs)
 
   def fn(self, tensor):
     x = upsample(tensor, self.scale)
@@ -258,14 +147,13 @@ def fn(self, tensor):
 
 
 class DNCNN(PerceptualOptimizer):
-  def __init__(self, channel, scale, noise, **kwargs):
-    super(DNCNN, self).__init__(1, channel, **kwargs)
+  def __init__(self, channel, noise, **kwargs):
     layers = kwargs.get('layers', 15)
     bn = kwargs.get('bn', True)
     self.dncnn = DnCnn(channel, layers, bn)
-    self.opt = self.get_opt(self.trainable_variables(), 1e-4)
     self.noise = noise / 255
     self.norm = torch.distributions.normal.Normal(0, self.noise)
+    super(DNCNN, self).__init__(1, channel, **kwargs)
 
   def fn(self, tensor):
     if self.noise > 0:
@@ -277,9 +165,8 @@ def fn(self, tensor):
 
 class DRCN(PerceptualOptimizer):
   def __init__(self, scale, channel, n_recur, **kwargs):
-    super(DRCN, self).__init__(scale, channel, **kwargs)
     self.drcn = Drcn(scale, channel, n_recur, 128)
-    self.opt = self.get_opt(self.trainable_variables(), 1e-4)
+    super(DRCN, self).__init__(scale, channel, **kwargs)
 
   def fn(self, tensor):
     return self.drcn(tensor)
@@ -287,9 +174,8 @@ def fn(self, tensor):
 
 class DRRN(PerceptualOptimizer):
   def __init__(self, scale, channel, n_rb, n_ru, **kwargs):
-    super(DRRN, self).__init__(scale, channel, **kwargs)
     self.drrn = Drrn(channel, n_ru, n_rb, 128)
-    self.opt = self.get_opt(self.trainable_variables(), 1e-4)
+    super(DRRN, self).__init__(scale, channel, **kwargs)
 
   def fn(self, tensor):
     x = upsample(tensor, self.scale)
diff --git a/VSR/Backend/Torch/Models/Contrib/__init__.py b/VSR/Backend/Torch/Models/Contrib/__init__.py
new file mode 100644
index 0000000..d5b89eb
--- /dev/null
+++ b/VSR/Backend/Torch/Models/Contrib/__init__.py
@@ -0,0 +1,5 @@
+#  Copyright (c) 2017-2020 Wenyi Tang.
+#  Author: Wenyi Tang
+#  Email: wenyitang@outlook.com
+#  Update: 2020 - 6 - 16
+
diff --git a/VSR/Backend/Torch/Models/ntire19/__init__.py b/VSR/Backend/Torch/Models/Contrib/ntire19/__init__.py
similarity index 100%
rename from VSR/Backend/Torch/Models/ntire19/__init__.py
rename to VSR/Backend/Torch/Models/Contrib/ntire19/__init__.py
diff --git a/VSR/Backend/Torch/Models/ntire19/denoise.py b/VSR/Backend/Torch/Models/Contrib/ntire19/denoise.py
similarity index 98%
rename from VSR/Backend/Torch/Models/ntire19/denoise.py
rename to VSR/Backend/Torch/Models/Contrib/ntire19/denoise.py
index 7b6b85c..24d1995 100644
--- a/VSR/Backend/Torch/Models/ntire19/denoise.py
+++ b/VSR/Backend/Torch/Models/Contrib/ntire19/denoise.py
@@ -8,7 +8,8 @@
 import torch
 import torch.nn as nn
 
-from ..Arch import Activation, Rdb, SpaceToDepth, CBAM
+from VSR.Backend.Torch.Models.Ops.Blocks import Activation, CBAM, Rdb
+from VSR.Backend.Torch.Models.Ops.Scale import SpaceToDepth
 
 _logger = logging.getLogger("VSR.NTIRE2019.Denoise")
 
diff --git a/VSR/Backend/Torch/Models/ntire19/edrn.py b/VSR/Backend/Torch/Models/Contrib/ntire19/edrn.py
similarity index 100%
rename from VSR/Backend/Torch/Models/ntire19/edrn.py
rename to VSR/Backend/Torch/Models/Contrib/ntire19/edrn.py
diff --git a/VSR/Backend/Torch/Models/ntire19/frn.py b/VSR/Backend/Torch/Models/Contrib/ntire19/frn.py
similarity index 99%
rename from VSR/Backend/Torch/Models/ntire19/frn.py
rename to VSR/Backend/Torch/Models/Contrib/ntire19/frn.py
index 399a553..01b29cf 100644
--- a/VSR/Backend/Torch/Models/ntire19/frn.py
+++ b/VSR/Backend/Torch/Models/Contrib/ntire19/frn.py
@@ -5,7 +5,7 @@
 
 import torch.nn as nn
 
-from ..edsr import common
+from VSR.Backend.Torch.Models.Edsr import common
 
 
 ## Channel Attention (CA) Layer
diff --git a/VSR/Backend/Torch/Models/ntire19/ran2.py b/VSR/Backend/Torch/Models/Contrib/ntire19/ran2.py
similarity index 100%
rename from VSR/Backend/Torch/Models/ntire19/ran2.py
rename to VSR/Backend/Torch/Models/Contrib/ntire19/ran2.py
diff --git a/VSR/Backend/Torch/Models/Contrib/ntire20/__init__.py b/VSR/Backend/Torch/Models/Contrib/ntire20/__init__.py
new file mode 100644
index 0000000..25d400e
--- /dev/null
+++ b/VSR/Backend/Torch/Models/Contrib/ntire20/__init__.py
@@ -0,0 +1,9 @@
+#  Copyright (c) 2017-2020 Wenyi Tang.
+#  Author: Wenyi Tang
+#  Email: wenyitang@outlook.com
+#  Update: 2020 - 5 - 28
+
+import logging
+_logger = logging.getLogger("VSR.NTIRE2020")
+_logger.info("Top rank models in NTIRE 2020."
+             "Real World Super-Resolution")
diff --git a/VSR/Backend/Torch/Models/Contrib/ntire20/xiaozhong/__init__.py b/VSR/Backend/Torch/Models/Contrib/ntire20/xiaozhong/__init__.py
new file mode 100644
index 0000000..d739b85
--- /dev/null
+++ b/VSR/Backend/Torch/Models/Contrib/ntire20/xiaozhong/__init__.py
@@ -0,0 +1,9 @@
+#  Copyright (c) 2017-2020 Wenyi Tang.
+#  Author: Wenyi Tang
+#  Email: wenyitang@outlook.com
+#  Update: 2020 - 5 - 28
+
+import logging
+_logger = logging.getLogger("VSR.RWSR")
+_logger.info("LICENSE: RealSR is implemented by Xiaozhong Ji. "
+             "@xiaozhongji https://github.com/jixiaozhong/RealSR")
diff --git a/VSR/Backend/Torch/Models/Contrib/ntire20/xiaozhong/ops/__init__.py b/VSR/Backend/Torch/Models/Contrib/ntire20/xiaozhong/ops/__init__.py
new file mode 100644
index 0000000..960152e
--- /dev/null
+++ b/VSR/Backend/Torch/Models/Contrib/ntire20/xiaozhong/ops/__init__.py
@@ -0,0 +1,66 @@
+#  Copyright (c) 2017-2020 Apache 2.0.
+#  Author: Xiaozhong Ji
+#  Update: 2020 - 5 - 28
+
+from .discriminator import (
+  Discriminator_VGG_128, Discriminator_VGG_256, Discriminator_VGG_512,
+  NLayerDiscriminator, VGGFeatureExtractor
+)
+from .network import RRDBNet
+
+
+####################
+# define network
+####################
+
+def define_G(which_model='RRDBNet', **opt):
+  """
+  Generator
+  :param which_model:
+  :param opt:
+  :return:
+  """
+
+  if which_model == 'RRDBNet':
+    return RRDBNet(in_nc=opt['in_nc'], out_nc=opt['out_nc'], nf=opt['nf'],
+                   nb=opt['nb'])
+  else:
+    raise NotImplementedError(f'Generator model [{which_model}] not recognized')
+
+
+def define_D(which_model='NLayerDiscriminator', **opt):
+  """
+  Discriminator
+  :param which_model:
+  :param opt:
+  :return:
+  """
+
+  if which_model == 'discriminator_vgg_128':
+    netD = Discriminator_VGG_128(in_nc=opt['in_nc'], nf=opt['nf'])
+  elif which_model == 'discriminator_vgg_256':
+    netD = Discriminator_VGG_256(in_nc=opt['in_nc'], nf=opt['nf'])
+  elif which_model == 'discriminator_vgg_512':
+    netD = Discriminator_VGG_512(in_nc=opt['in_nc'], nf=opt['nf'])
+  elif which_model == 'NLayerDiscriminator':
+    netD = NLayerDiscriminator(input_nc=opt['in_nc'], ndf=opt['nf'],
+                               n_layers=opt['nlayer'])
+  else:
+    raise NotImplementedError(
+        f'Discriminator model [{which_model}] not recognized')
+  return netD
+
+
+def define_F(use_bn=False):
+  """
+  Define Network used for Perceptual Loss
+  PyTorch pre-trained VGG19-54, before ReLU.
+  :param use_bn:
+  :return:
+  """
+
+  feature_layer = 49 if use_bn else 34
+  netF = VGGFeatureExtractor(feature_layer=feature_layer, use_bn=use_bn,
+                             use_input_norm=True)
+  netF.eval()  # No need to train
+  return netF
diff --git a/VSR/Backend/Torch/Models/Contrib/ntire20/xiaozhong/ops/discriminator.py b/VSR/Backend/Torch/Models/Contrib/ntire20/xiaozhong/ops/discriminator.py
new file mode 100644
index 0000000..216cfa3
--- /dev/null
+++ b/VSR/Backend/Torch/Models/Contrib/ntire20/xiaozhong/ops/discriminator.py
@@ -0,0 +1,279 @@
+#  Copyright (c) 2017-2020 Apache 2.0.
+#  Author: Xiaozhong Ji
+#  Update: 2020 - 5 - 28
+
+import torch
+import torch.nn as nn
+import torchvision
+
+
+class NLayerDiscriminator(nn.Module):
+  """Defines a PatchGAN discriminator"""
+
+  def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d):
+    """Construct a PatchGAN discriminator
+
+    Parameters:
+        input_nc (int)  -- the number of channels in input images
+        ndf (int)       -- the number of filters in the last conv layer
+        n_layers (int)  -- the number of conv layers in the discriminator
+        norm_layer      -- normalization layer
+    """
+    super(NLayerDiscriminator, self).__init__()
+    use_bias = False
+    kw = 4
+    padw = 1
+    sequence = [
+      nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw),
+      nn.LeakyReLU(0.2, True)]
+    nf_mult = 1
+    nf_mult_prev = 1
+    for n in range(1, n_layers):  # gradually increase the number of filters
+      nf_mult_prev = nf_mult
+      nf_mult = min(2 ** n, 8)
+      sequence += [
+        nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=2,
+                  padding=padw, bias=use_bias),
+        norm_layer(ndf * nf_mult),
+        nn.LeakyReLU(0.2, True)
+      ]
+
+    nf_mult_prev = nf_mult
+    nf_mult = min(2 ** n_layers, 8)
+    sequence += [
+      nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=1,
+                padding=padw, bias=use_bias),
+      norm_layer(ndf * nf_mult),
+      nn.LeakyReLU(0.2, True)
+    ]
+
+    sequence += [nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1,
+                           padding=padw)]  # output 1 channel prediction map
+    # TODO
+    self.model = nn.Sequential(*sequence)
+
+  def forward(self, x):
+    """Standard forward."""
+    return self.model(x)
+
+
+class Discriminator_VGG_128(nn.Module):
+  def __init__(self, in_nc, nf):
+    super(Discriminator_VGG_128, self).__init__()
+    # [64, 128, 128]
+    self.conv0_0 = nn.Conv2d(in_nc, nf, 3, 1, 1, bias=True)
+    self.conv0_1 = nn.Conv2d(nf, nf, 4, 2, 1, bias=False)
+    self.bn0_1 = nn.BatchNorm2d(nf, affine=True)
+    # [64, 64, 64]
+    self.conv1_0 = nn.Conv2d(nf, nf * 2, 3, 1, 1, bias=False)
+    self.bn1_0 = nn.BatchNorm2d(nf * 2, affine=True)
+    self.conv1_1 = nn.Conv2d(nf * 2, nf * 2, 4, 2, 1, bias=False)
+    self.bn1_1 = nn.BatchNorm2d(nf * 2, affine=True)
+    # [128, 32, 32]
+    self.conv2_0 = nn.Conv2d(nf * 2, nf * 4, 3, 1, 1, bias=False)
+    self.bn2_0 = nn.BatchNorm2d(nf * 4, affine=True)
+    self.conv2_1 = nn.Conv2d(nf * 4, nf * 4, 4, 2, 1, bias=False)
+    self.bn2_1 = nn.BatchNorm2d(nf * 4, affine=True)
+    # [256, 16, 16]
+    self.conv3_0 = nn.Conv2d(nf * 4, nf * 8, 3, 1, 1, bias=False)
+    self.bn3_0 = nn.BatchNorm2d(nf * 8, affine=True)
+    self.conv3_1 = nn.Conv2d(nf * 8, nf * 8, 4, 2, 1, bias=False)
+    self.bn3_1 = nn.BatchNorm2d(nf * 8, affine=True)
+    # [512, 8, 8]
+    self.conv4_0 = nn.Conv2d(nf * 8, nf * 8, 3, 1, 1, bias=False)
+    self.bn4_0 = nn.BatchNorm2d(nf * 8, affine=True)
+    self.conv4_1 = nn.Conv2d(nf * 8, nf * 8, 4, 2, 1, bias=False)
+    self.bn4_1 = nn.BatchNorm2d(nf * 8, affine=True)
+
+    self.linear1 = nn.Linear(512 * 4 * 4, 100)
+    self.linear2 = nn.Linear(100, 1)
+
+    # activation function
+    self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+
+  def forward(self, x):
+    fea = self.lrelu(self.conv0_0(x))
+    fea = self.lrelu(self.bn0_1(self.conv0_1(fea)))
+
+    fea = self.lrelu(self.bn1_0(self.conv1_0(fea)))
+    fea = self.lrelu(self.bn1_1(self.conv1_1(fea)))
+
+    fea = self.lrelu(self.bn2_0(self.conv2_0(fea)))
+    fea = self.lrelu(self.bn2_1(self.conv2_1(fea)))
+
+    fea = self.lrelu(self.bn3_0(self.conv3_0(fea)))
+    fea = self.lrelu(self.bn3_1(self.conv3_1(fea)))
+
+    fea = self.lrelu(self.bn4_0(self.conv4_0(fea)))
+    fea = self.lrelu(self.bn4_1(self.conv4_1(fea)))
+
+    fea = fea.view(fea.size(0), -1)
+    fea = self.lrelu(self.linear1(fea))
+    out = self.linear2(fea)
+    return out
+
+
+class Discriminator_VGG_256(nn.Module):
+  def __init__(self, in_nc, nf):
+    super(Discriminator_VGG_256, self).__init__()
+    # [64, 128, 128]
+    self.conv0_0 = nn.Conv2d(in_nc, nf, 3, 1, 1, bias=True)
+    self.conv0_1 = nn.Conv2d(nf, nf, 4, 2, 1, bias=False)
+    self.bn0_1 = nn.BatchNorm2d(nf, affine=True)
+    # [64, 64, 64]
+    self.conv1_0 = nn.Conv2d(nf, nf * 2, 3, 1, 1, bias=False)
+    self.bn1_0 = nn.BatchNorm2d(nf * 2, affine=True)
+    self.conv1_1 = nn.Conv2d(nf * 2, nf * 2, 4, 2, 1, bias=False)
+    self.bn1_1 = nn.BatchNorm2d(nf * 2, affine=True)
+    # [128, 32, 32]
+    self.conv2_0 = nn.Conv2d(nf * 2, nf * 4, 3, 1, 1, bias=False)
+    self.bn2_0 = nn.BatchNorm2d(nf * 4, affine=True)
+    self.conv2_1 = nn.Conv2d(nf * 4, nf * 4, 4, 2, 1, bias=False)
+    self.bn2_1 = nn.BatchNorm2d(nf * 4, affine=True)
+    # [256, 16, 16]
+    self.conv3_0 = nn.Conv2d(nf * 4, nf * 8, 3, 1, 1, bias=False)
+    self.bn3_0 = nn.BatchNorm2d(nf * 8, affine=True)
+    self.conv3_1 = nn.Conv2d(nf * 8, nf * 8, 4, 2, 1, bias=False)
+    self.bn3_1 = nn.BatchNorm2d(nf * 8, affine=True)
+    # [512, 8, 8]
+    self.conv4_0 = nn.Conv2d(nf * 8, nf * 8, 3, 1, 1, bias=False)
+    self.bn4_0 = nn.BatchNorm2d(nf * 8, affine=True)
+    self.conv4_1 = nn.Conv2d(nf * 8, nf * 8, 4, 2, 1, bias=False)
+    self.bn4_1 = nn.BatchNorm2d(nf * 8, affine=True)
+
+    self.conv5_0 = nn.Conv2d(nf * 8, nf * 8, 3, 1, 1, bias=False)
+    self.bn5_0 = nn.BatchNorm2d(nf * 8, affine=True)
+    self.conv5_1 = nn.Conv2d(nf * 8, nf * 8, 4, 2, 1, bias=False)
+    self.bn5_1 = nn.BatchNorm2d(nf * 8, affine=True)
+
+    self.linear1 = nn.Linear(512 * 4 * 4, 100)
+    self.linear2 = nn.Linear(100, 1)
+
+    # activation function
+    self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+
+  def forward(self, x):
+    fea = self.lrelu(self.conv0_0(x))
+    fea = self.lrelu(self.bn0_1(self.conv0_1(fea)))
+
+    fea = self.lrelu(self.bn1_0(self.conv1_0(fea)))
+    fea = self.lrelu(self.bn1_1(self.conv1_1(fea)))
+
+    fea = self.lrelu(self.bn2_0(self.conv2_0(fea)))
+    fea = self.lrelu(self.bn2_1(self.conv2_1(fea)))
+
+    fea = self.lrelu(self.bn3_0(self.conv3_0(fea)))
+    fea = self.lrelu(self.bn3_1(self.conv3_1(fea)))
+
+    fea = self.lrelu(self.bn4_0(self.conv4_0(fea)))
+    fea = self.lrelu(self.bn4_1(self.conv4_1(fea)))
+
+    fea = self.lrelu(self.bn5_0(self.conv5_0(fea)))
+    fea = self.lrelu(self.bn5_1(self.conv5_1(fea)))
+
+    fea = fea.view(fea.size(0), -1)
+    fea = self.lrelu(self.linear1(fea))
+    out = self.linear2(fea)
+    return out
+
+
+class Discriminator_VGG_512(nn.Module):
+  def __init__(self, in_nc, nf):
+    super(Discriminator_VGG_512, self).__init__()
+    # [64, 128, 128]
+    self.conv0_0 = nn.Conv2d(in_nc, nf, 3, 1, 1, bias=True)
+    self.conv0_1 = nn.Conv2d(nf, nf, 4, 2, 1, bias=False)
+    self.bn0_1 = nn.BatchNorm2d(nf, affine=True)
+    # [64, 64, 64]
+    self.conv1_0 = nn.Conv2d(nf, nf * 2, 3, 1, 1, bias=False)
+    self.bn1_0 = nn.BatchNorm2d(nf * 2, affine=True)
+    self.conv1_1 = nn.Conv2d(nf * 2, nf * 2, 4, 2, 1, bias=False)
+    self.bn1_1 = nn.BatchNorm2d(nf * 2, affine=True)
+    # [128, 32, 32]
+    self.conv2_0 = nn.Conv2d(nf * 2, nf * 4, 3, 1, 1, bias=False)
+    self.bn2_0 = nn.BatchNorm2d(nf * 4, affine=True)
+    self.conv2_1 = nn.Conv2d(nf * 4, nf * 4, 4, 2, 1, bias=False)
+    self.bn2_1 = nn.BatchNorm2d(nf * 4, affine=True)
+    # [256, 16, 16]
+    self.conv3_0 = nn.Conv2d(nf * 4, nf * 8, 3, 1, 1, bias=False)
+    self.bn3_0 = nn.BatchNorm2d(nf * 8, affine=True)
+    self.conv3_1 = nn.Conv2d(nf * 8, nf * 8, 4, 2, 1, bias=False)
+    self.bn3_1 = nn.BatchNorm2d(nf * 8, affine=True)
+    # [512, 8, 8]
+    self.conv4_0 = nn.Conv2d(nf * 8, nf * 8, 3, 1, 1, bias=False)
+    self.bn4_0 = nn.BatchNorm2d(nf * 8, affine=True)
+    self.conv4_1 = nn.Conv2d(nf * 8, nf * 8, 4, 2, 1, bias=False)
+    self.bn4_1 = nn.BatchNorm2d(nf * 8, affine=True)
+
+    self.conv5_0 = nn.Conv2d(nf * 8, nf * 8, 3, 1, 1, bias=False)
+    self.bn5_0 = nn.BatchNorm2d(nf * 8, affine=True)
+    self.conv5_1 = nn.Conv2d(nf * 8, nf * 8, 4, 2, 1, bias=False)
+    self.bn5_1 = nn.BatchNorm2d(nf * 8, affine=True)
+
+    self.conv6_0 = nn.Conv2d(nf * 8, nf * 8, 3, 1, 1, bias=False)
+    self.bn6_0 = nn.BatchNorm2d(nf * 8, affine=True)
+    self.conv6_1 = nn.Conv2d(nf * 8, nf * 8, 4, 2, 1, bias=False)
+    self.bn6_1 = nn.BatchNorm2d(nf * 8, affine=True)
+
+    self.linear1 = nn.Linear(512 * 4 * 4, 100)
+    self.linear2 = nn.Linear(100, 1)
+
+    # activation function
+    self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+
+  def forward(self, x):
+    fea = self.lrelu(self.conv0_0(x))
+    fea = self.lrelu(self.bn0_1(self.conv0_1(fea)))
+
+    fea = self.lrelu(self.bn1_0(self.conv1_0(fea)))
+    fea = self.lrelu(self.bn1_1(self.conv1_1(fea)))
+
+    fea = self.lrelu(self.bn2_0(self.conv2_0(fea)))
+    fea = self.lrelu(self.bn2_1(self.conv2_1(fea)))
+
+    fea = self.lrelu(self.bn3_0(self.conv3_0(fea)))
+    fea = self.lrelu(self.bn3_1(self.conv3_1(fea)))
+
+    fea = self.lrelu(self.bn4_0(self.conv4_0(fea)))
+    fea = self.lrelu(self.bn4_1(self.conv4_1(fea)))
+
+    fea = self.lrelu(self.bn5_0(self.conv5_0(fea)))
+    fea = self.lrelu(self.bn5_1(self.conv5_1(fea)))
+
+    fea = self.lrelu(self.bn6_0(self.conv6_0(fea)))
+    fea = self.lrelu(self.bn6_1(self.conv6_1(fea)))
+
+    fea = fea.view(fea.size(0), -1)
+    fea = self.lrelu(self.linear1(fea))
+    out = self.linear2(fea)
+    return out
+
+
+class VGGFeatureExtractor(nn.Module):
+  def __init__(self, feature_layer=34, use_bn=False, use_input_norm=True):
+    super(VGGFeatureExtractor, self).__init__()
+    self.use_input_norm = use_input_norm
+    if use_bn:
+      model = torchvision.models.vgg19_bn(pretrained=True)
+    else:
+      model = torchvision.models.vgg19(pretrained=True)
+    if self.use_input_norm:
+      mean = torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1)
+      # [0.485 - 1, 0.456 - 1, 0.406 - 1] if input in range [-1, 1]
+      std = torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1)
+      # [0.229 * 2, 0.224 * 2, 0.225 * 2] if input in range [-1, 1]
+      self.register_buffer('mean', mean)
+      self.register_buffer('std', std)
+    self.features = nn.Sequential(
+        *list(model.features.children())[:(feature_layer + 1)])
+    # No need to BP to variable
+    for k, v in self.features.named_parameters():
+      v.requires_grad = False
+
+  def forward(self, x):
+    # Assume input range is [0, 1]
+    if self.use_input_norm:
+      dev = x.device
+      x = (x - self.mean.to(dev)) / self.std.to(dev)
+    output = self.features(x)
+    return output
diff --git a/VSR/Backend/Torch/Models/Contrib/ntire20/xiaozhong/ops/loss.py b/VSR/Backend/Torch/Models/Contrib/ntire20/xiaozhong/ops/loss.py
new file mode 100644
index 0000000..d0f9ed3
--- /dev/null
+++ b/VSR/Backend/Torch/Models/Contrib/ntire20/xiaozhong/ops/loss.py
@@ -0,0 +1,79 @@
+#  Copyright (c) 2017-2020 Apache 2.0.
+#  Author: Xiaozhong Ji
+#  Update: 2020 - 5 - 28
+import torch
+import torch.nn as nn
+
+
+class CharbonnierLoss(nn.Module):
+  """Charbonnier Loss (L1)"""
+
+  def __init__(self, eps=1e-6):
+    super(CharbonnierLoss, self).__init__()
+    self.eps = eps
+
+  def forward(self, x, y):
+    diff = x - y
+    loss = torch.sum(torch.sqrt(diff * diff + self.eps))
+    return loss
+
+
+# Define GAN loss: [vanilla | lsgan | wgan-gp]
+class GANLoss(nn.Module):
+  def __init__(self, gan_type, real_label_val=1.0, fake_label_val=0.0):
+    super(GANLoss, self).__init__()
+    self.gan_type = gan_type.lower()
+    self.real_label_val = real_label_val
+    self.fake_label_val = fake_label_val
+
+    if self.gan_type == 'gan' or self.gan_type == 'ragan':
+      self.loss = nn.BCEWithLogitsLoss()
+    elif self.gan_type == 'lsgan':
+      self.loss = nn.MSELoss()
+    elif self.gan_type == 'wgan-gp':
+
+      def wgan_loss(input, target):
+        # target is boolean
+        return -1 * input.mean() if target else input.mean()
+
+      self.loss = wgan_loss
+    else:
+      raise NotImplementedError(
+        'GAN type [{:s}] is not found'.format(self.gan_type))
+
+  def get_target_label(self, input, target_is_real):
+    if self.gan_type == 'wgan-gp':
+      return target_is_real
+    if target_is_real:
+      return torch.empty_like(input).fill_(self.real_label_val)
+    else:
+      return torch.empty_like(input).fill_(self.fake_label_val)
+
+  def forward(self, input, target_is_real):
+    target_label = self.get_target_label(input, target_is_real)
+    loss = self.loss(input, target_label)
+    return loss
+
+
+class GradientPenaltyLoss(nn.Module):
+  def __init__(self, device=torch.device('cpu')):
+    super(GradientPenaltyLoss, self).__init__()
+    self.register_buffer('grad_outputs', torch.Tensor())
+    self.grad_outputs = self.grad_outputs.to(device)
+
+  def get_grad_outputs(self, input):
+    if self.grad_outputs.size() != input.size():
+      self.grad_outputs.resize_(input.size()).fill_(1.0)
+    return self.grad_outputs
+
+  def forward(self, interp, interp_crit):
+    grad_outputs = self.get_grad_outputs(interp_crit)
+    grad_interp = torch.autograd.grad(outputs=interp_crit, inputs=interp,
+                                      grad_outputs=grad_outputs,
+                                      create_graph=True,
+                                      retain_graph=True, only_inputs=True)[0]
+    grad_interp = grad_interp.view(grad_interp.size(0), -1)
+    grad_interp_norm = grad_interp.norm(2, dim=1)
+
+    loss = ((grad_interp_norm - 1) ** 2).mean()
+    return loss
diff --git a/VSR/Backend/Torch/Models/Contrib/ntire20/xiaozhong/ops/network.py b/VSR/Backend/Torch/Models/Contrib/ntire20/xiaozhong/ops/network.py
new file mode 100644
index 0000000..2e74f33
--- /dev/null
+++ b/VSR/Backend/Torch/Models/Contrib/ntire20/xiaozhong/ops/network.py
@@ -0,0 +1,92 @@
+#  Copyright (c) 2017-2020 Apache 2.0.
+#  Author: Xiaozhong Ji
+#  Update: 2020 - 5 - 28
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def initialize_weights(net_l, scale=1):
+  if not isinstance(net_l, list):
+    net_l = [net_l]
+  for net in net_l:
+    for m in net.modules():
+      if isinstance(m, nn.Conv2d):
+        nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in')
+        m.weight.data *= scale  # for residual block
+        if m.bias is not None:
+          m.bias.data.zero_()
+      elif isinstance(m, nn.Linear):
+        nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in')
+        m.weight.data *= scale
+        if m.bias is not None:
+          m.bias.data.zero_()
+      elif isinstance(m, nn.BatchNorm2d):
+        nn.init.constant_(m.weight, 1)
+        nn.init.constant_(m.bias.data, 0.0)
+
+
+class ResidualDenseBlock_5C(nn.Module):
+  def __init__(self, nf=64, gc=32, bias=True):
+    super(ResidualDenseBlock_5C, self).__init__()
+    # gc: growth channel, i.e. intermediate channels
+    self.conv1 = nn.Conv2d(nf, gc, 3, 1, 1, bias=bias)
+    self.conv2 = nn.Conv2d(nf + gc, gc, 3, 1, 1, bias=bias)
+    self.conv3 = nn.Conv2d(nf + 2 * gc, gc, 3, 1, 1, bias=bias)
+    self.conv4 = nn.Conv2d(nf + 3 * gc, gc, 3, 1, 1, bias=bias)
+    self.conv5 = nn.Conv2d(nf + 4 * gc, nf, 3, 1, 1, bias=bias)
+    self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+
+    # initialization
+    initialize_weights(
+        [self.conv1, self.conv2, self.conv3, self.conv4, self.conv5], 0.1)
+
+  def forward(self, x):
+    x1 = self.lrelu(self.conv1(x))
+    x2 = self.lrelu(self.conv2(torch.cat((x, x1), 1)))
+    x3 = self.lrelu(self.conv3(torch.cat((x, x1, x2), 1)))
+    x4 = self.lrelu(self.conv4(torch.cat((x, x1, x2, x3), 1)))
+    x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1))
+    return x5 * 0.2 + x
+
+
+class RRDB(nn.Module):
+  '''Residual in Residual Dense Block'''
+
+  def __init__(self, nf, gc=32):
+    super(RRDB, self).__init__()
+    self.RDB1 = ResidualDenseBlock_5C(nf, gc)
+    self.RDB2 = ResidualDenseBlock_5C(nf, gc)
+    self.RDB3 = ResidualDenseBlock_5C(nf, gc)
+
+  def forward(self, x):
+    out = self.RDB1(x)
+    out = self.RDB2(out)
+    out = self.RDB3(out)
+    return out * 0.2 + x
+
+
+class RRDBNet(nn.Module):
+  def __init__(self, in_nc, out_nc, nf, nb, gc=32):
+    super(RRDBNet, self).__init__()
+    self.conv_first = nn.Conv2d(in_nc, nf, 3, 1, 1, bias=True)
+    self.RRDB_trunk = nn.Sequential(*[RRDB(nf=nf, gc=gc) for _ in range(nb)])
+    self.trunk_conv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
+    #### upsampling
+    self.upconv1 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
+    self.upconv2 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
+    self.HRconv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
+    self.conv_last = nn.Conv2d(nf, out_nc, 3, 1, 1, bias=True)
+    self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+
+  def forward(self, x):
+    fea = self.conv_first(x)
+    trunk = self.trunk_conv(self.RRDB_trunk(fea))
+    fea = fea + trunk
+    fea = self.lrelu(
+        self.upconv1(F.interpolate(fea, scale_factor=2, mode='nearest')))
+    fea = self.lrelu(
+        self.upconv2(F.interpolate(fea, scale_factor=2, mode='nearest')))
+    out = self.conv_last(self.lrelu(self.HRconv(fea)))
+    return out
diff --git a/VSR/Backend/Torch/Models/Crdn.py b/VSR/Backend/Torch/Models/Crdn.py
index b40ea17..cff32c8 100644
--- a/VSR/Backend/Torch/Models/Crdn.py
+++ b/VSR/Backend/Torch/Models/Crdn.py
@@ -9,7 +9,7 @@
 
 from VSR.Util.Utility import to_list
 from . import Model
-from .Arch import CascadeRdn
+from .Ops.Blocks import CascadeRdn
 from ..Framework.Summary import get_writer
 from ..Util import Metrics
 
@@ -34,21 +34,21 @@ def __init__(self, blocks=(4, 4), **kwargs):
     self.blocks = to_list(blocks, 2)
 
     self.entry = nn.Sequential(
-      nn.Conv2d(3, 32, 7, 1, 3),
-      nn.Conv2d(32, 32, 5, 1, 2))
+        nn.Conv2d(3, 32, 7, 1, 3),
+        nn.Conv2d(32, 32, 5, 1, 2))
     self.exit = nn.Sequential(
-      nn.Conv2d(32, 32, 3, 1, 1),
-      nn.Conv2d(32, 3, 3, 1, 1))
+        nn.Conv2d(32, 32, 3, 1, 1),
+        nn.Conv2d(32, 3, 3, 1, 1))
     self.down1 = nn.Conv2d(32, 64, 3, 2, 1)
     self.down2 = nn.Conv2d(64, 128, 3, 2, 1)
     self.up1 = Upsample([128, 64])
     self.up2 = Upsample([64, 32])
-    self.cb1 = CascadeRdn(32, 3, True)
-    self.cb2 = CascadeRdn(64, 3, True)
-    self.cb3 = CascadeRdn(128, 3, True)
-    self.cb4 = CascadeRdn(128, 3, True)
-    self.cb5 = CascadeRdn(64, 3, True)
-    self.cb6 = CascadeRdn(32, 3, True)
+    self.cb1 = CascadeRdn(32, 32, 3, True)
+    self.cb2 = CascadeRdn(64, 64, 3, True)
+    self.cb3 = CascadeRdn(128, 128, 3, True)
+    self.cb4 = CascadeRdn(128, 128, 3, True)
+    self.cb5 = CascadeRdn(64, 64, 3, True)
+    self.cb6 = CascadeRdn(32, 32, 3, True)
 
   def forward(self, inputs):
     entry = self.entry(inputs)
diff --git a/VSR/Backend/Torch/Models/Dbpn.py b/VSR/Backend/Torch/Models/Dbpn.py
index cc7b963..adfdf72 100644
--- a/VSR/Backend/Torch/Models/Dbpn.py
+++ b/VSR/Backend/Torch/Models/Dbpn.py
@@ -3,38 +3,150 @@
 #  Email: wenyi.tang@intel.com
 #  Update Date: 2019 - 3 - 15
 
+import logging
+
 import torch
+import torch.nn as nn
 import torch.nn.functional as F
 
 from .Model import SuperResolution
-from .dbpn import dbpn, dbpn_v1, dbpns
+from .Ops.Blocks import EasyConv2d
 from ..Util import Metrics
 
+_logger = logging.getLogger("VSR.DBPN")
+_logger.info("LICENSE: DBPN is implemented by Haris. "
+             "@alterzero https://github.com/alterzero/DBPN-Pytorch")
+
 
-class DBPNMaker(torch.nn.Module):
-  def __init__(self, mode='dbpn', **kwargs):
-    super(DBPNMaker, self).__init__()
-    _allowed_mode = ('dbpn', 'dbpnll', 'dbpns')
-    mode = mode.lower()
-    assert mode in _allowed_mode, "mode must in ('DBPN', 'DBPNLL', 'DBPNS)."
-    if mode == 'dbpn':
-      self.module = dbpn.Net(**kwargs)
-    elif mode == 'dbpnll':
-      self.module = dbpn_v1.Net(**kwargs)
-    elif mode == 'dbpns':
-      self.module = dbpns.Net(**kwargs)
-    else:
-      raise NotImplemented
+class UpBlock(torch.nn.Module):
+  def __init__(self, num_filter, kernel_size=8, stride=4, activation='prelu'):
+    super(UpBlock, self).__init__()
+    self.up_conv1 = EasyConv2d(num_filter, num_filter, kernel_size, stride,
+                               activation=activation, transposed=True)
+    self.up_conv2 = EasyConv2d(num_filter, num_filter, kernel_size, stride,
+                               activation=activation)
+    self.up_conv3 = EasyConv2d(num_filter, num_filter, kernel_size, stride,
+                               activation=activation, transposed=True)
 
   def forward(self, x):
-    return self.module(x)
+    h0 = self.up_conv1(x)
+    l0 = self.up_conv2(h0)
+    h1 = self.up_conv3(l0 - x)
+    return h1 + h0
 
 
-class DBPN(SuperResolution):
+class DownBlock(torch.nn.Module):
+  def __init__(self, num_filter, kernel_size=8, stride=4, activation='prelu'):
+    super(DownBlock, self).__init__()
+    self.down_conv1 = EasyConv2d(num_filter, num_filter, kernel_size, stride,
+                                 activation=activation)
+    self.down_conv2 = EasyConv2d(num_filter, num_filter, kernel_size, stride,
+                                 activation=activation, transposed=True)
+    self.down_conv3 = EasyConv2d(num_filter, num_filter, kernel_size, stride,
+                                 activation=activation)
+
+  def forward(self, x):
+    l0 = self.down_conv1(x)
+    h0 = self.down_conv2(l0)
+    l1 = self.down_conv3(h0 - x)
+    return l1 + l0
+
 
-  def __init__(self, scale, mode='dbpn', **kwargs):
-    super(DBPN, self).__init__(scale, 3)
-    self.body = DBPNMaker(mode, scale_factor=scale, **kwargs)
+class D_UpBlock(torch.nn.Module):
+  def __init__(self, num_filter, kernel_size=8, stride=4, num_stages=1,
+               activation='prelu'):
+    super(D_UpBlock, self).__init__()
+    self.conv = EasyConv2d(num_filter * num_stages, num_filter, 1,
+                           activation=activation)
+    self.up_conv1 = EasyConv2d(num_filter, num_filter, kernel_size, stride,
+                               activation=activation, transposed=True)
+    self.up_conv2 = EasyConv2d(num_filter, num_filter, kernel_size, stride,
+                               activation=activation)
+    self.up_conv3 = EasyConv2d(num_filter, num_filter, kernel_size, stride,
+                               activation=activation, transposed=True)
+
+  def forward(self, x):
+    x = self.conv(x)
+    h0 = self.up_conv1(x)
+    l0 = self.up_conv2(h0)
+    h1 = self.up_conv3(l0 - x)
+    return h1 + h0
+
+
+class D_DownBlock(torch.nn.Module):
+  def __init__(self, num_filter, kernel_size=8, stride=4, num_stages=1,
+               activation='prelu'):
+    super(D_DownBlock, self).__init__()
+    self.conv = EasyConv2d(num_filter * num_stages, num_filter, 1,
+                           activation=activation)
+    self.down_conv1 = EasyConv2d(num_filter, num_filter, kernel_size, stride,
+                                 activation=activation)
+    self.down_conv2 = EasyConv2d(num_filter, num_filter, kernel_size, stride,
+                                 activation=activation, transposed=True)
+    self.down_conv3 = EasyConv2d(num_filter, num_filter, kernel_size, stride,
+                                 activation=activation)
+
+  def forward(self, x):
+    x = self.conv(x)
+    l0 = self.down_conv1(x)
+    h0 = self.down_conv2(l0)
+    l1 = self.down_conv3(h0 - x)
+    return l1 + l0
+
+
+class Dbpn(nn.Module):
+  def __init__(self, channels, scale, base_filter=64, feat=256, num_stages=7):
+    super(Dbpn, self).__init__()
+    kernel, stride = self.get_kernel_stride(scale)
+
+    # Initial Feature Extraction
+    self.feat0 = EasyConv2d(channels, feat, 3, activation='prelu')
+    self.feat1 = EasyConv2d(feat, base_filter, 1, activation='prelu')
+    # Back-projection stages
+    self.up1 = UpBlock(base_filter, kernel, stride)
+    self.down1 = DownBlock(base_filter, kernel, stride)
+    self.up2 = UpBlock(base_filter, kernel, stride)
+    for i in range(2, num_stages):
+      self.__setattr__(f'down{i}', D_DownBlock(base_filter, kernel, stride, i))
+      self.__setattr__(f'up{i + 1}', D_UpBlock(base_filter, kernel, stride, i))
+    self.num_stages = num_stages
+    # Reconstruction
+    self.output_conv = EasyConv2d(num_stages * base_filter, channels, 3)
+
+  def forward(self, x):
+    x = self.feat0(x)
+    x = self.feat1(x)
+
+    h1 = self.up1(x)
+    l1 = self.down1(h1)
+    h2 = self.up2(l1)
+
+    h = h2
+    concat_h = h1
+    concat_l = l1
+    for i in range(2, self.num_stages):
+      concat_h = torch.cat((h, concat_h), 1)
+      l = self.__getattr__(f'down{i}')(concat_h)
+      concat_l = torch.cat((l, concat_l), 1)
+      h = self.__getattr__(f'up{i + 1}')(concat_l)
+    concat_h = torch.cat((h, concat_h), 1)
+    x = self.output_conv(concat_h)
+    return x
+
+  @staticmethod
+  def get_kernel_stride(scale):
+    if scale == 2:
+      return 6, 2
+    elif scale == 4:
+      return 8, 4
+    elif scale == 8:
+      return 12, 8
+
+
+class DBPN(SuperResolution):
+  def __init__(self, channel, scale, **kwargs):
+    super(DBPN, self).__init__(scale, channel)
+    self.body = Dbpn(channel, scale, **kwargs)
     self.opt = torch.optim.Adam(self.trainable_variables(), 1e-4)
 
   def train(self, inputs, labels, learning_rate=None):
diff --git a/VSR/Backend/Torch/Models/Drn.py b/VSR/Backend/Torch/Models/Drn.py
index 83009a6..1c0487e 100644
--- a/VSR/Backend/Torch/Models/Drn.py
+++ b/VSR/Backend/Torch/Models/Drn.py
@@ -7,11 +7,12 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-from .Arch import Upsample, EasyConv2d
+from VSR.Backend.Torch.Models.Ops.Loss import total_variance
 from .Model import SuperResolution
+from .Ops.Blocks import EasyConv2d
+from .Ops.Scale import Upsample
 from ..Framework.Summary import get_writer
-from ..Util import Metrics, Utility
-from .Loss import total_variance
+from ..Util import Metrics
 
 
 class NoiseExtractor(nn.Module):
diff --git a/VSR/Backend/Torch/Models/Edsr.py b/VSR/Backend/Torch/Models/Edsr.py
index ab80416..b6390f6 100644
--- a/VSR/Backend/Torch/Models/Edsr.py
+++ b/VSR/Backend/Torch/Models/Edsr.py
@@ -3,23 +3,99 @@
 #  Email: wenyi.tang@intel.com
 #  Update Date: 2019 - 3 - 15
 
+import logging
+import random
+
 import torch
+import torch.nn as nn
 import torch.nn.functional as F
 
 from .Model import SuperResolution
-from .edsr import edsr, mdsr
+from .Ops.Blocks import EasyConv2d, MeanShift, RB
+from .Ops.Scale import MultiscaleUpsample, Upsample
 from ..Util import Metrics
-from VSR.Util.Config import Config
 
+_logger = logging.getLogger("VSR.EDSR")
+_logger.info("LICENSE: EDSR is implemented by Bee Lim. "
+             "@thstkdgus35 https://github.com/thstkdgus35/EDSR-PyTorch")
 
-class EDSR(SuperResolution):
+url = {
+  'r16f64x2': 'https://cv.snu.ac.kr/research/EDSR/models/edsr_baseline_x2-1bc95232.pt',
+  'r16f64x3': 'https://cv.snu.ac.kr/research/EDSR/models/edsr_baseline_x3-abf2a44e.pt',
+  'r16f64x4': 'https://cv.snu.ac.kr/research/EDSR/models/edsr_baseline_x4-6b446fab.pt',
+  'r32f256x2': 'https://cv.snu.ac.kr/research/EDSR/models/edsr_x2-0edfb8a3.pt',
+  'r32f256x3': 'https://cv.snu.ac.kr/research/EDSR/models/edsr_x3-ea3ef2c6.pt',
+  'r32f256x4': 'https://cv.snu.ac.kr/research/EDSR/models/edsr_x4-4f62e9ef.pt',
+  'r16f64': 'https://cv.snu.ac.kr/research/EDSR/models/mdsr_baseline-a00cab12.pt',
+  'r80f64': 'https://cv.snu.ac.kr/research/EDSR/models/mdsr-4a78bedf.pt'
+}
+
+
+class Edsr(nn.Module):
+  def __init__(self, scale, channel, n_resblocks, n_feats, rgb_range):
+    super(Edsr, self).__init__()
+    self.sub_mean = MeanShift((0.4488, 0.4371, 0.4040), True, rgb_range)
+    self.add_mean = MeanShift((0.4488, 0.4371, 0.4040), False, rgb_range)
+    # define head module
+    m_head = [EasyConv2d(channel, n_feats, 3)]
+    # define body module
+    m_body = [RB(n_feats, n_feats, 3, activation='relu') for _ in
+              range(n_resblocks)]
+    m_body.append(EasyConv2d(n_feats, n_feats, 3))
+    # define tail module
+    m_tail = [
+      Upsample(n_feats, scale),
+      EasyConv2d(n_feats, channel, 3)]
+    self.head = nn.Sequential(*m_head)
+    self.body = nn.Sequential(*m_body)
+    self.tail = nn.Sequential(*m_tail)
+
+  def forward(self, x, **kwargs):
+    x = self.sub_mean(x)
+    x = self.head(x)
+    res = self.body(x) + x
+    x = self.tail(res)
+    x = self.add_mean(x)
+    return x
+
+
+class Mdsr(nn.Module):
+  def __init__(self, scales, channel, n_resblocks, n_feats, rgb_range):
+    super(Mdsr, self).__init__()
+    self.sub_mean = MeanShift((0.4488, 0.4371, 0.4040), True, rgb_range)
+    self.add_mean = MeanShift((0.4488, 0.4371, 0.4040), False, rgb_range)
+    m_head = [EasyConv2d(channel, n_feats, 3)]
+    self.pre_process = nn.ModuleList([
+      nn.Sequential(
+          RB(n_feats, kernel_size=5, activation='relu'),
+          RB(n_feats, kernel_size=5, activation='relu')
+      ) for _ in scales
+    ])
+    m_body = [RB(n_feats, kernel_size=3, activation='relu') for _ in
+              range(n_resblocks)]
+    m_body.append(EasyConv2d(n_feats, n_feats, 3))
+    self.upsample = MultiscaleUpsample(n_feats, scales)
+    m_tail = [EasyConv2d(n_feats, channel, 3)]
+    self.head = nn.Sequential(*m_head)
+    self.body = nn.Sequential(*m_body)
+    self.tail = nn.Sequential(*m_tail)
 
-  def __init__(self, scale, **kwargs):
-    super(EDSR, self).__init__(scale, 3)
-    args = Config(kwargs)
-    args.scale = [scale]
-    self.rgb_range = args.rgb_range
-    self.edsr = edsr.EDSR(args)
+  def forward(self, x, scale):
+    x = self.sub_mean(x)
+    x = self.head(x)
+    x = self.pre_process[scale](x)
+    res = self.body(x) + x
+    x = self.upsample(res, scale)
+    x = self.tail(x)
+    x = self.add_mean(x)
+    return x
+
+
+class EDSR(SuperResolution):
+  def __init__(self, scale, channel, rgb_range=255, **kwargs):
+    super(EDSR, self).__init__(scale, channel)
+    self.rgb_range = rgb_range
+    self.edsr = Edsr(scale, channel, rgb_range=rgb_range, **kwargs)
     self.opt = torch.optim.Adam(self.trainable_variables(), 1e-4)
 
   def train(self, inputs, labels, learning_rate=None):
@@ -43,20 +119,16 @@ def eval(self, inputs, labels=None, **kwargs):
 
 
 class MSDR(SuperResolution):
-
-  def __init__(self, scale, **kwargs):
-    super(MSDR, self).__init__(scale, 3)
-    args = Config(kwargs)
-    args.scale = [2, 3, 4]
-    self.scales = args.scale
-    self.rgb_range = args.rgb_range
-    self.edsr = mdsr.MDSR(args)
+  def __init__(self, scale, channel, rgb_range=255, **kwargs):
+    super(MSDR, self).__init__(scale, channel)
+    self.rgb_range = rgb_range
+    self.scales = (2, 3, 4)
+    self.mdsr = Mdsr(self.scales, channel, rgb_range=rgb_range, **kwargs)
     self.opt = torch.optim.Adam(self.trainable_variables(), 1e-4)
 
   def train(self, inputs, labels, learning_rate=None):
-    # TODO
-    self.edsr.set_scale(2)
-    sr = self.edsr(inputs[0] * self.rgb_range) / self.rgb_range
+    scale = self.scales[random.randint(0, 3)]
+    sr = self.mdsr(inputs[0] * self.rgb_range, scale) / self.rgb_range
     loss = F.l1_loss(sr, labels[0])
     if learning_rate:
       for param_group in self.opt.param_groups:
@@ -68,8 +140,7 @@ def train(self, inputs, labels, learning_rate=None):
 
   def eval(self, inputs, labels=None, **kwargs):
     metrics = {}
-    self.edsr.set_scale(self.scales.index(self.scale))
-    sr = self.edsr(inputs[0] * self.rgb_range) / self.rgb_range
+    sr = self.mdsr(inputs[0] * self.rgb_range, self.scale) / self.rgb_range
     sr = sr.cpu().detach()
     if labels is not None:
       metrics['psnr'] = Metrics.psnr(sr.numpy(), labels[0].cpu().numpy())
diff --git a/VSR/Backend/Torch/Models/Esrgan.py b/VSR/Backend/Torch/Models/Esrgan.py
index d8ea135..c4086be 100644
--- a/VSR/Backend/Torch/Models/Esrgan.py
+++ b/VSR/Backend/Torch/Models/Esrgan.py
@@ -3,27 +3,58 @@
 #  Email: wenyi.tang@intel.com
 #  Update Date: 2019 - 3 - 15
 
+import logging
+
 import numpy as np
 import torch
+import torch.nn as nn
 import torch.nn.functional as F
 
-from . import Discriminator as disc
 from .Model import SuperResolution
-from .Loss import gan_bce_loss, VggFeatureLoss
-from .esrgan import architecture as arch
-from ..Util import Metrics
+from .Ops.Blocks import Activation, EasyConv2d, Rrdb
+from .Ops.Discriminator import DCGAN
+from .Ops.Loss import VggFeatureLoss, gan_bce_loss
+from .Ops.Scale import Upsample
 from ..Framework.Summary import get_writer
+from ..Util import Metrics
+
+_logger = logging.getLogger("VSR.ESRGAN")
+_logger.info("LICENSE: ESRGAN is implemented by Xintao Wang. "
+             "@xinntao https://github.com/xinntao/ESRGAN")
+
+
+class RRDB_Net(nn.Module):
+  def __init__(self, channel, scale, nf, nb, gc=32):
+    super(RRDB_Net, self).__init__()
+    self.head = EasyConv2d(channel, nf, kernel_size=3)
+    rb_blocks = [
+      Rrdb(nf, gc, 5, 0.2, kernel_size=3, activation=Activation('lrelu', 0.2))
+      for _ in range(nb)]
+    LR_conv = EasyConv2d(nf, nf, kernel_size=3)
+    upsampler = [Upsample(nf, scale, 'nearest',
+                          activation=Activation('lrelu', 0.2))]
+    HR_conv0 = EasyConv2d(nf, nf, kernel_size=3, activation='lrelu')
+    HR_conv1 = EasyConv2d(nf, channel, kernel_size=3)
+    self.body = nn.Sequential(*rb_blocks, LR_conv)
+    self.tail = nn.Sequential(*upsampler, HR_conv0, HR_conv1)
+
+  def forward(self, x):
+    x = self.head(x)
+    x = self.body(x) + x
+    x = self.tail(x)
+    return x
 
 
 class ESRGAN(SuperResolution):
-  def __init__(self, scale, patch_size=128, weights=(0.01, 1, 5e-3), **kwargs):
-    super(ESRGAN, self).__init__(scale, 3)
+  def __init__(self, channel, scale, patch_size=128, weights=(0.01, 1, 5e-3),
+               nf=64, nb=23, gc=32, **kwargs):
+    super(ESRGAN, self).__init__(scale, channel)
     self.use_vgg = weights[1] > 0
     self.use_gan = weights[2] > 0
     if self.use_gan:
-      self.dnet = disc.DCGAN(3, np.log2(patch_size // 4) * 2, 'bn')
+      self.dnet = DCGAN(3, np.log2(patch_size // 4) * 2, 'bn')
       self.optd = torch.optim.Adam(self.trainable_variables('dnet'), 1e-4)
-    self.rrdb = arch.RRDB_Net(upscale=scale, **kwargs)
+    self.rrdb = RRDB_Net(channel, scale, nf, nb, gc)
     self.optg = torch.optim.Adam(self.trainable_variables('rrdb'), 1e-4)
     if self.use_vgg:
       self.vgg = [VggFeatureLoss(['block5_conv4'], True)]
diff --git a/VSR/Backend/Torch/Models/Ffdnet.py b/VSR/Backend/Torch/Models/Ffdnet.py
index a1a7867..aa30aab 100644
--- a/VSR/Backend/Torch/Models/Ffdnet.py
+++ b/VSR/Backend/Torch/Models/Ffdnet.py
@@ -7,8 +7,9 @@
 import torch.nn.functional as F
 from torch import nn
 
-from .Arch import EasyConv2d, SpaceToDepth, Upsample
 from .Model import SuperResolution
+from .Ops.Blocks import EasyConv2d
+from .Ops.Scale import SpaceToDepth, Upsample
 from ..Framework.Summary import get_writer
 from ..Util import Metrics
 
diff --git a/VSR/Backend/Torch/Models/Frvsr.py b/VSR/Backend/Torch/Models/Frvsr.py
index 0d4dbbc..92b0a6e 100644
--- a/VSR/Backend/Torch/Models/Frvsr.py
+++ b/VSR/Backend/Torch/Models/Frvsr.py
@@ -3,32 +3,53 @@
 #   Email: wenyi.tang@intel.com
 #   Update Date: 4/1/19, 7:13 PM
 
+import logging
+
 import numpy as np
 import torch
 import torch.nn.functional as F
 from torch import nn
 
-from .Arch import SpaceToDepth
-from .Loss import total_variance
 from .Model import SuperResolution
-from .frvsr.ops import FNet, SRNet
-from .video.motion import STN
+from .Ops.Blocks import RB
+from .Ops.Loss import total_variance
+from .Ops.Motion import Flownet, STN
+from .Ops.Scale import SpaceToDepth, Upsample
 from ..Framework.Summary import get_writer
 from ..Util import Metrics
 from ..Util.Utility import pad_if_divide, upsample
 
+_logger = logging.getLogger("VSR.FRVSR")
+_logger.info("LICENSE: FRVSR is proposed by Sajjadi, et. al. "
+             "implemented by LoSeall. "
+             "@loseall https://github.com/loseall/VideoSuperResolution")
+
+
+class SRNet(nn.Module):
+  def __init__(self, channel, scale, n_rb=10):
+    super(SRNet, self).__init__()
+    rbs = [RB(64, activation='relu') for _ in range(n_rb)]
+    entry = [nn.Conv2d(channel * (scale ** 2 + 1), 64, 3, 1, 1), nn.ReLU(True)]
+    up = Upsample(64, scale, method='ps')
+    out = nn.Conv2d(64, channel, 3, 1, 1)
+    self.body = nn.Sequential(*entry, *rbs, up, out)
+
+  def forward(self, *inputs):
+    x = torch.cat(inputs, dim=1)
+    return self.body(x)
+
 
 class FRNet(nn.Module):
   def __init__(self, channel, scale, n_rb):
     super(FRNet, self).__init__()
-    self.fnet = FNet(channel, gain=32)
+    self.fnet = Flownet(channel)
     self.warp = STN(padding_mode='border')
     self.snet = SRNet(channel, scale, n_rb)
     self.space_to_depth = SpaceToDepth(scale)
     self.scale = scale
 
   def forward(self, lr, last_lr, last_sr):
-    flow = self.fnet(lr, last_lr)
+    flow = self.fnet(lr, last_lr, gain=32)
     flow2 = self.scale * upsample(flow, self.scale)
     hw = self.warp(last_sr, flow2[:, 0], flow2[:, 1])
     lw = self.warp(last_lr, flow[:, 0], flow[:, 1])
diff --git a/VSR/Backend/Torch/Models/Mldn.py b/VSR/Backend/Torch/Models/Mldn.py
index 6f38f06..0eb52fa 100644
--- a/VSR/Backend/Torch/Models/Mldn.py
+++ b/VSR/Backend/Torch/Models/Mldn.py
@@ -7,8 +7,9 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-from .Arch import CascadeRdn, Upsample
 from .Model import SuperResolution
+from .Ops.Blocks import CascadeRdn
+from .Ops.Scale import Upsample
 from ..Framework.Summary import get_writer
 from ..Util import Metrics, Utility
 
diff --git a/VSR/Backend/Torch/Models/Model.py b/VSR/Backend/Torch/Models/Model.py
index 38dfb7e..a434e83 100644
--- a/VSR/Backend/Torch/Models/Model.py
+++ b/VSR/Backend/Torch/Models/Model.py
@@ -3,13 +3,15 @@
 #  Email: wenyi.tang@intel.com
 #  Update Date: 2019/4/3 下午5:10
 
-import torch
 import logging
+from collections import OrderedDict
+
+import torch
 
 from ..Framework.Trainer import SRTrainer
 
 
-class BasicModel:
+class BasicModel(object):
   """Trainable model wrapper for PyTorch nn.Module objects
 
   There are 2 built-in attributes:
@@ -18,12 +20,11 @@ class BasicModel:
     - opts: contains a K-V pair of `str: optim.Optimizer`. Will be automatically
       appended if a derived object assign any attribute with `optim.Optimizer`.
   """
-
-  def __init__(self, **kwargs):
-    self.modules = {}
-    self.opts = {}
-    self.name = ''
-    self._trainer = None
+  modules = OrderedDict()
+  opts = OrderedDict()
+  name = ''
+  loaded = None
+  _trainer = None
 
   def __setattr__(self, key, value):
     if key in ('modules', 'opts',):
@@ -36,7 +37,7 @@ def __setattr__(self, key, value):
         else:
           # TODO: why assign twice??
           raise NotImplementedError
-      else:
+      elif len(list(value.parameters())):
         self.modules[key] = value
         self.name += f'[{key}]'
     if isinstance(value, torch.optim.Optimizer):
@@ -120,15 +121,39 @@ def get_executor(self, root):
   def load(self, pth, map_location=None):
     for key, model in self.modules.items():
       if not isinstance(pth, dict):
-        model.load_state_dict(torch.load(str(pth), map_location=map_location))
+        self.sequential_load(model, str(pth), map_location)
         break
-      model.load_state_dict(
-          torch.load(str(pth[key]), map_location=map_location))
+      self.sequential_load(model, str(pth[key]), map_location)
+    self.loaded = True
     for key, opt in self.opts.items():
       if isinstance(pth, dict):
         opt.load_state_dict(
             torch.load(str(pth[key]), map_location=map_location))
 
+  @staticmethod
+  def sequential_load(module, pth, map_location=None):
+    state_dict = torch.load(pth, map_location=map_location)
+    p = module.state_dict()
+    while len(state_dict) and len(p):
+      saved_name, saved_data = state_dict.popitem()
+      name, buffer = p.popitem()
+      if saved_name != name:
+        logging.getLogger('VSR').warning(
+            f"unmatched name: expected {name}, got {saved_name}.")
+      if buffer.shape == saved_data.shape:
+        buffer.data.copy_(saved_data)
+      else:
+        logging.getLogger('VSR').error(
+            f"Checkpoint shape mismatch for {name}, "
+            f"expected {buffer.shape}, but got {saved_data.shape}")
+        raise ValueError
+    while len(state_dict):
+      saved_name, _ = state_dict.popitem()
+      logging.getLogger('VSR').warning(f"Unexpected keys: {saved_name}")
+    while len(p):
+      name, _ = p.popitem()
+      logging.getLogger('VSR').warning(f"Missing keys: {saved_name}")
+
 
 class SuperResolution(BasicModel):
   """A default model for (video) super-resolution"""
diff --git a/VSR/Backend/Torch/Models/Msrn.py b/VSR/Backend/Torch/Models/Msrn.py
index 32aa33a..ecce3b7 100644
--- a/VSR/Backend/Torch/Models/Msrn.py
+++ b/VSR/Backend/Torch/Models/Msrn.py
@@ -3,23 +3,92 @@
 #  Email: wenyi.tang@intel.com
 #  Update Date: 2019 - 3 - 15
 
+import logging
+
 import torch
+import torch.nn as nn
 import torch.nn.functional as F
 
 from .Model import SuperResolution
-from .msrn import msrn
+from .Ops.Blocks import EasyConv2d, MeanShift
+from .Ops.Scale import Upsample
 from ..Util import Metrics
-from VSR.Util.Config import Config
 
+_logger = logging.getLogger("VSR.MSRN")
+_logger.info("LICENSE: MSRN is implemented by Juncheng Li. "
+             "@MIVRC https://github.com/MIVRC/MSRN-PyTorch")
 
-class MSRN(SuperResolution):
 
-  def __init__(self, scale, **kwargs):
+class MSRB(nn.Module):
+  def __init__(self, n_feats=64):
+    super(MSRB, self).__init__()
+    self.conv_3_1 = EasyConv2d(n_feats, n_feats, 3)
+    self.conv_3_2 = EasyConv2d(n_feats * 2, n_feats * 2, 3)
+    self.conv_5_1 = EasyConv2d(n_feats, n_feats, 5)
+    self.conv_5_2 = EasyConv2d(n_feats * 2, n_feats * 2, 5)
+    self.confusion = nn.Conv2d(n_feats * 4, n_feats, 1, padding=0, stride=1)
+
+  def forward(self, x):
+    input_1 = x
+    output_3_1 = F.relu(self.conv_3_1(input_1))
+    output_5_1 = F.relu(self.conv_5_1(input_1))
+    input_2 = torch.cat([output_3_1, output_5_1], 1)
+    output_3_2 = F.relu(self.conv_3_2(input_2))
+    output_5_2 = F.relu(self.conv_5_2(input_2))
+    input_3 = torch.cat([output_3_2, output_5_2], 1)
+    output = self.confusion(input_3)
+    output += x
+    return output
+
+
+class Msrn(nn.Module):
+  def __init__(self, channel, scale, n_feats, n_blocks, rgb_range):
+    super(Msrn, self).__init__()
+    self.n_blocks = n_blocks
+    # RGB mean for DIV2K
+    rgb_mean = (0.4488, 0.4371, 0.4040)
+    self.sub_mean = MeanShift(rgb_mean, True, rgb_range)
+    # define head module
+    modules_head = [EasyConv2d(channel, n_feats, 3)]
+    # define body module
+    modules_body = nn.ModuleList()
+    for i in range(n_blocks):
+      modules_body.append(MSRB(n_feats=n_feats))
+    # define tail module
+    modules_tail = [
+      EasyConv2d(n_feats * (self.n_blocks + 1), n_feats, 1),
+      EasyConv2d(n_feats, n_feats, 3),
+      Upsample(n_feats, scale),
+      EasyConv2d(n_feats, channel, 3)]
+
+    self.add_mean = MeanShift(rgb_mean, False, rgb_range)
+    self.head = nn.Sequential(*modules_head)
+    self.body = nn.Sequential(*modules_body)
+    self.tail = nn.Sequential(*modules_tail)
+
+  def forward(self, x):
+    x = self.sub_mean(x)
+    x = self.head(x)
+    res = x
+
+    MSRB_out = []
+    for i in range(self.n_blocks):
+      x = self.body[i](x)
+      MSRB_out.append(x)
+    MSRB_out.append(res)
+
+    res = torch.cat(MSRB_out, 1)
+    x = self.tail(res)
+    x = self.add_mean(x)
+    return x
+
+
+class MSRN(SuperResolution):
+  def __init__(self, scale, channel, n_feats=64, n_blocks=8, rgb_range=255,
+               **kwargs):
     super(MSRN, self).__init__(scale, 3)
-    args = Config(kwargs)
-    args.scale = [scale]
-    self.rgb_range = args.rgb_range
-    self.msrn = msrn.MSRN(args)
+    self.rgb_range = rgb_range
+    self.msrn = Msrn(channel, scale, n_feats, n_blocks, rgb_range)
     self.opt = torch.optim.Adam(self.trainable_variables(), 1e-4)
 
   def train(self, inputs, labels, learning_rate=None):
diff --git a/VSR/Backend/Torch/Models/NTIRE19.py b/VSR/Backend/Torch/Models/NTIRE19.py
index 9c15c5e..d3e223b 100644
--- a/VSR/Backend/Torch/Models/NTIRE19.py
+++ b/VSR/Backend/Torch/Models/NTIRE19.py
@@ -3,59 +3,21 @@
 #  Email: wenyi.tang@intel.com
 #  Update Date: 2019/4/16
 
-import torch
-import torch.nn.functional as F
-
 from VSR.Util.Config import Config
-from .Model import SuperResolution
-from .ntire19 import denoise, edrn, frn, ran2
-from ..Util import Metrics, Utility
-
-
-class L1Optimizer(SuperResolution):
-  def __init__(self, channel, scale=1):
-    super(L1Optimizer, self).__init__(scale, channel)
-
-  def fn(self, x):
-    raise NotImplementedError
-
-  def train(self, inputs, labels, learning_rate=None):
-    sr = self.fn(inputs[0])
-    loss = F.l1_loss(sr, labels[0])
-    if learning_rate:
-      for param_group in self.opt.param_groups:
-        param_group["lr"] = learning_rate
-    self.opt.zero_grad()
-    loss.backward()
-    self.opt.step()
-    return {'l1': loss.detach().cpu().numpy()}
-
-  def eval(self, inputs, labels=None, **kwargs):
-    metrics = {}
-    _lr = inputs[0]
-    lr = Utility.pad_if_divide(_lr, 32)
-    a = lr.size(2) - _lr.size(2)
-    b = lr.size(3) - _lr.size(3)
-    slice_h = slice(None) if a == 0 else slice(a // 2, -a // 2)
-    slice_w = slice(None) if b == 0 else slice(b // 2, -b // 2)
-    sr = self.fn(lr)[..., slice_h, slice_w]
-    sr = sr.cpu().detach()
-    if labels is not None:
-      metrics['psnr'] = Metrics.psnr(sr.numpy(), labels[0].cpu().numpy())
-    return [sr.numpy()], metrics
+from .Contrib.ntire19 import denoise, edrn, frn, ran2
+from .Optim.SISR import L1Optimizer
 
 
 class EDRN(L1Optimizer):
   """EDRN is one candidate of NTIRE19 RSR"""
 
   def __init__(self, scale, channel, **kwargs):
-    super(EDRN, self).__init__(channel=channel, scale=scale)
     args = Config(kwargs)
     args.scale = [scale]
     args.n_colors = channel
     self.rgb_range = args.rgb_range
     self.edrn = edrn.EDRN(args)
-    self.opt = torch.optim.Adam(self.trainable_variables(), 1e-4)
+    super(EDRN, self).__init__(channel=channel, scale=scale, **kwargs)
 
   def fn(self, x):
     return self.edrn(x * self.rgb_range) / self.rgb_range
@@ -63,13 +25,12 @@ def fn(self, x):
 
 class FRN(L1Optimizer):
   def __init__(self, scale, channel, **kwargs):
-    super(FRN, self).__init__(channel=channel, scale=scale)
     args = Config(kwargs)
     args.scale = [scale]
     args.n_colors = channel
     self.rgb_range = args.rgb_range
     self.frn = frn.FRN_UPDOWN(args)
-    self.opt = torch.optim.Adam(self.trainable_variables(), 1e-4)
+    super(FRN, self).__init__(channel=channel, scale=scale, **kwargs)
 
   def fn(self, x):
     return self.frn(x * self.rgb_range) / self.rgb_range
@@ -77,13 +38,12 @@ def fn(self, x):
 
 class RAN(L1Optimizer):
   def __init__(self, scale, channel, **kwargs):
-    super(RAN, self).__init__(channel=channel, scale=scale)
     args = Config(kwargs)
     args.scale = [scale]
     args.n_colors = channel
     self.rgb_range = args.rgb_range
     self.ran = ran2.RAN(args)
-    self.opt = torch.optim.Adam(self.trainable_variables(), 1e-4)
+    super(RAN, self).__init__(channel=channel, scale=scale, **kwargs)
 
   def fn(self, x):
     return self.ran(x * self.rgb_range) / self.rgb_range
@@ -91,9 +51,8 @@ def fn(self, x):
 
 class DIDN(L1Optimizer):
   def __init__(self, channel, filters, umodule, **kwargs):
-    super(DIDN, self).__init__(channel=channel)
     self.didn = denoise.EraserTeam.DIDN(channel, filters, umodule)
-    self.opt = torch.optim.Adam(self.trainable_variables(), 1e-4)
+    super(DIDN, self).__init__(channel=channel, **kwargs)
 
   def fn(self, x):
     return self.didn(x)
@@ -101,9 +60,8 @@ def fn(self, x):
 
 class DHDN(L1Optimizer):
   def __init__(self, channel, filters, **kwargs):
-    super(DHDN, self).__init__(channel=channel)
     self.dhdn = denoise.EraserTeam.DHDN(channel, filters)
-    self.opt = torch.optim.Adam(self.trainable_variables(), 1e-4)
+    super(DHDN, self).__init__(channel=channel, **kwargs)
 
   def fn(self, x):
     return self.dhdn(x)
@@ -111,9 +69,8 @@ def fn(self, x):
 
 class GRDN(L1Optimizer):
   def __init__(self, channel, filters, grdb, rdb, **kwargs):
-    super(GRDN, self).__init__(channel=channel)
     self.grdn = denoise.DGUTeam.GRDN(channel, filters, grdb, rdb)
-    self.opt = torch.optim.Adam(self.trainable_variables(), 1e-4)
+    super(GRDN, self).__init__(channel=channel, **kwargs)
 
   def fn(self, x):
     return self.grdn(x)
@@ -121,9 +78,8 @@ def fn(self, x):
 
 class ResUNet(L1Optimizer):
   def __init__(self, channel, filters, rb, **kwargs):
-    super(ResUNet, self).__init__(channel=channel)
     self.resunet = denoise.HITVPCTeam.ResUNet(channel, filters, rb)
-    self.opt = torch.optim.Adam(self.trainable_variables(), 1e-4)
+    super(ResUNet, self).__init__(channel=channel, **kwargs)
 
   def fn(self, x):
     return self.resunet(x)
diff --git a/VSR/Backend/Torch/Models/NTIRE20.py b/VSR/Backend/Torch/Models/NTIRE20.py
index e69de29..e923f60 100644
--- a/VSR/Backend/Torch/Models/NTIRE20.py
+++ b/VSR/Backend/Torch/Models/NTIRE20.py
@@ -0,0 +1,54 @@
+#  Copyright (c) 2017-2020 Wenyi Tang.
+#  Author: Wenyi Tang
+#  Email: wenyitang@outlook.com
+#  Update: 2020 - 5 - 28
+
+import torch
+import torch.nn.functional as F
+
+from .Contrib.ntire20.xiaozhong.ops import define_D, define_F, define_G
+from .Model import SuperResolution
+from ..Util import Metrics
+
+
+class RealSR(SuperResolution):
+  """
+  RealSR proposed by Xiaozhong Ji
+  See (NTIRE report, not full paper): https://arxiv.org/pdf/2005.01996.pdf
+  """
+
+  def __init__(self, channel=3, scale=4, nf=64, nb=23, **kwargs):
+    super(RealSR, self).__init__(channel=channel, scale=scale)
+    self.weights = [
+      kwargs.get('pixel_weight', 1),
+      kwargs.get('feature_weight', 0),
+      kwargs.get('gan_weight', 0)
+    ]
+    self.realsr_g = define_G(in_nc=channel, out_nc=channel, nf=nf, nb=nb)
+    self.opt_g = torch.optim.Adam(self.trainable_variables('realsr_g'), 1e-4,
+                                  betas=(0.5, 0.999))
+    if self.weights[1] > 0:
+      self.feature_net = define_F()  # for feature loss
+    if self.weights[2] > 0:
+      self.realsr_d = define_D(in_nc=channel, nf=nf, nlayers=3)
+      self.opt_d = torch.optim.Adam(self.trainable_variables('realsr_d'), 1e-4,
+                                    betas=(0.5, 0.999))
+
+  def train(self, inputs, labels, learning_rate=None):
+    sr = self.realsr_g(inputs[0])
+    pixel_loss = F.l1_loss(sr, labels[0]) * self.weights[0]
+    loss = pixel_loss
+    if learning_rate:
+      for param_group in self.opt.param_groups:
+        param_group["lr"] = learning_rate
+    self.opt_g.zero_grad()
+    loss.backward()
+    self.opt_g.step()
+    return {'l1': loss.detach().cpu().numpy()}
+
+  def eval(self, inputs, labels=None, **kwargs):
+    metrics = {}
+    sr = self.realsr_g(inputs[0]).cpu().detach()
+    if labels is not None:
+      metrics['psnr'] = Metrics.psnr(sr.numpy(), labels[0].cpu().numpy())
+    return [sr.numpy()], metrics
diff --git a/VSR/Backend/Torch/Models/Ops/Blocks.py b/VSR/Backend/Torch/Models/Ops/Blocks.py
new file mode 100644
index 0000000..2a1b970
--- /dev/null
+++ b/VSR/Backend/Torch/Models/Ops/Blocks.py
@@ -0,0 +1,388 @@
+#  Copyright (c) 2017-2020 Wenyi Tang.
+#  Author: Wenyi Tang
+#  Email: wenyitang@outlook.com
+#  Update: 2020 - 6 - 15
+
+import math
+
+import torch
+import torch.nn as nn
+from torch.autograd import Variable
+from torch.nn import Parameter
+from torch.nn import functional as F
+from torch.nn.modules.utils import _pair
+
+from VSR.Util.Utility import to_list
+
+
+class MeanShift(nn.Conv2d):
+  def __init__(self, mean_rgb, sub, rgb_range=1.0):
+    super(MeanShift, self).__init__(3, 3, 1)
+    sign = -1 if sub else 1
+    self.weight.data = torch.eye(3).view(3, 3, 1, 1)
+    self.bias.data = torch.Tensor(mean_rgb) * sign * rgb_range
+    # Freeze the mean shift layer
+    for params in self.parameters():
+      params.requires_grad = False
+
+
+class Activation(nn.Module):
+  def __init__(self, act, *args, **kwargs):
+    super(Activation, self).__init__()
+    if act is None:
+      self.f = lambda t: t
+    if isinstance(act, str):
+      self.name = act.lower()
+      in_place = kwargs.get('in_place', True)
+      if self.name == 'relu':
+        self.f = nn.ReLU(in_place)
+      elif self.name == 'prelu':
+        self.f = nn.PReLU()
+      elif self.name in ('lrelu', 'leaky', 'leakyrelu'):
+        self.f = nn.LeakyReLU(*args, inplace=in_place)
+      elif self.name == 'tanh':
+        self.f = nn.Tanh()
+      elif self.name == 'sigmoid':
+        self.f = nn.Sigmoid()
+    elif callable(act):
+      self.f = act
+
+  def forward(self, x):
+    return self.f(x)
+
+
+class EasyConv2d(nn.Module):
+  def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+               padding='same', dilation=1, groups=1, activation=None,
+               use_bias=True, use_bn=False, use_sn=False, transposed=False):
+    super(EasyConv2d, self).__init__()
+    padding = padding.lower()
+    assert padding in ('same', 'valid')
+    if transposed:
+      assert padding == 'same'
+      q = kernel_size % 2  # output padding
+      p = (kernel_size + q - stride) // 2  # padding
+      net = [nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride,
+                                p, q, groups, use_bias, dilation)]
+    else:
+      if padding == 'same':
+        padding_ = (dilation * (kernel_size - 1) - stride + 2) // 2
+      else:
+        padding_ = 0
+      net = [nn.Conv2d(in_channels, out_channels, kernel_size, stride,
+                       padding_, dilation, groups, use_bias)]
+    if use_sn:
+      net[0] = nn.utils.spectral_norm(net[0])
+    if use_bn:
+      net += [nn.BatchNorm2d(out_channels)]
+    if activation:
+      net += [Activation(activation, in_place=True)]
+    self.body = nn.Sequential(*net)
+
+  def forward(self, x):
+    return self.body(x)
+
+  def initialize_(self, kernel, bias=None):
+    """initialize the convolutional weights from external sources
+
+    Args:
+        kernel: kernel weight. Shape=[OUT, IN, K, K]
+        bias: bias weight. Shape=[OUT]
+    """
+
+    dtype = self.body[0].weight.dtype
+    device = self.body[0].weight.device
+    kernel = torch.tensor(kernel, dtype=dtype, device=device,
+                          requires_grad=True)
+    assert kernel.shape == self.body[0].weight.shape, "Wrong kernel shape!"
+    if bias is not None:
+      bias = torch.tensor(bias, dtype=dtype, device=device, requires_grad=True)
+      assert bias.shape == self.body[0].bias.shape, "Wrong bias shape!"
+    self.body[0].weight.data.copy_(kernel)
+    self.body[0].bias.data.copy_(bias)
+
+
+class RB(nn.Module):
+  def __init__(self, in_channels, out_channels=None, kernel_size=3,
+               activation=None, use_bias=True, use_bn=False, use_sn=False,
+               act_first=None):
+    super(RB, self).__init__()
+    if out_channels is None:
+      out_channels = in_channels
+    conv1 = nn.Conv2d(in_channels, out_channels, kernel_size, 1,
+                      kernel_size // 2, bias=use_bias)
+    conv2 = nn.Conv2d(out_channels, out_channels, kernel_size, 1,
+                      kernel_size // 2, bias=use_bias)
+    if use_sn:
+      conv1 = nn.utils.spectral_norm(conv1)
+      conv2 = nn.utils.spectral_norm(conv2)
+    net = [conv1, Activation(activation, in_place=True), conv2]
+    if use_bn:
+      net.insert(1, nn.BatchNorm2d(out_channels))
+      if act_first:
+        net = [nn.BatchNorm2d(in_channels),
+               Activation(activation, in_place=True)] + net
+      else:
+        net.append(nn.BatchNorm2d(out_channels))
+    self.body = nn.Sequential(*net)
+    if in_channels != out_channels:
+      self.shortcut = nn.Conv2d(in_channels, out_channels, 1)
+
+  def forward(self, x):
+    out = self.body(x)
+    if hasattr(self, 'shortcut'):
+      sc = self.shortcut(x)
+      return out + sc
+    return out + x
+
+
+class Rdb(nn.Module):
+  def __init__(self, channels, filters, depth=3, scaling=1.0,
+               name='Rdb', **kwargs):
+    super(Rdb, self).__init__()
+    self.name = name
+    self.depth = depth
+    self.scaling = scaling
+    for i in range(depth):
+      conv = EasyConv2d(channels + filters * i, filters, **kwargs)
+      setattr(self, f'conv_{i}', conv)
+    # no activation after last layer
+    try:
+      kwargs.pop('activation')
+    except KeyError:
+      pass
+    conv = EasyConv2d(channels + filters * (depth - 1), channels, **kwargs)
+    setattr(self, f'conv_{depth - 1}', conv)
+
+  def forward(self, inputs):
+    fl = [inputs]
+    for i in range(self.depth):
+      conv = getattr(self, f'conv_{i}')
+      fl.append(conv(torch.cat(fl, dim=1)))
+    return fl[-1] * self.scaling + inputs
+
+  def extra_repr(self):
+    return f"{self.name}: depth={self.depth}, scaling={self.scaling}"
+
+
+class Rrdb(nn.Module):
+  """
+  Residual in Residual Dense Block
+  """
+
+  def __init__(self, nc, gc=32, depth=5, scaling=1.0, **kwargs):
+    super(Rrdb, self).__init__()
+    self.RDB1 = Rdb(nc, gc, depth, scaling, **kwargs)
+    self.RDB2 = Rdb(nc, gc, depth, scaling, **kwargs)
+    self.RDB3 = Rdb(nc, gc, depth, scaling, **kwargs)
+    self.scaling = scaling
+
+  def forward(self, x):
+    out = self.RDB1(x)
+    out = self.RDB2(out)
+    out = self.RDB3(out)
+    return out.mul(self.scaling) + x
+
+
+class Rcab(nn.Module):
+  def __init__(self, channels, ratio=16, name='RCAB', **kwargs):
+    super(Rcab, self).__init__()
+    self.name = name
+    self.ratio = ratio
+    in_c, out_c = to_list(channels, 2)
+    ks = kwargs.get('kernel_size', 3)
+    padding = kwargs.get('padding', ks // 2)
+    group = kwargs.get('group', 1)
+    bias = kwargs.get('bias', True)
+    self.c1 = nn.Sequential(
+        nn.Conv2d(in_c, out_c, ks, 1, padding, 1, group, bias),
+        nn.ReLU(True))
+    self.c2 = nn.Conv2d(out_c, out_c, ks, 1, padding, 1, group, bias)
+    self.c3 = nn.Sequential(
+        nn.Conv2d(out_c, out_c // ratio, 1, groups=group, bias=bias),
+        nn.ReLU(True))
+    self.c4 = nn.Sequential(
+        nn.Conv2d(out_c // ratio, in_c, 1, groups=group, bias=bias),
+        nn.Sigmoid())
+    self.pooling = nn.AdaptiveAvgPool2d(1)
+
+  def forward(self, inputs):
+    x = self.c1(inputs)
+    y = self.c2(x)
+    x = self.pooling(y)
+    x = self.c3(x)
+    x = self.c4(x)
+    y = x * y
+    return inputs + y
+
+  def extra_repr(self):
+    return f"{self.name}: ratio={self.ratio}"
+
+
+class CascadeRdn(nn.Module):
+  def __init__(self, channels, filters, depth=3, use_ca=False,
+               name='CascadeRdn', **kwargs):
+    super(CascadeRdn, self).__init__()
+    self.name = name
+    self.depth = to_list(depth, 2)
+    self.ca = use_ca
+    for i in range(self.depth[0]):
+      setattr(self, f'conv11_{i}',
+              nn.Conv2d(channels + filters * (i + 1), filters, 1))
+      setattr(self, f'rdn_{i}', Rdb(channels, filters, self.depth[1], **kwargs))
+      if use_ca:
+        setattr(self, f'rcab_{i}', Rcab(channels))
+
+  def forward(self, inputs):
+    fl = [inputs]
+    x = inputs
+    for i in range(self.depth[0]):
+      rdn = getattr(self, f'rdn_{i}')
+      x = rdn(x)
+      if self.ca:
+        rcab = getattr(self, f'rcab_{i}')
+        x = rcab(x)
+      fl.append(x)
+      c11 = getattr(self, f'conv11_{i}')
+      x = c11(torch.cat(fl, dim=1))
+
+    return x
+
+  def extra_repr(self):
+    return f"{self.name}: depth={self.depth}, ca={self.ca}"
+
+
+class CBAM(nn.Module):
+  """Convolutional Block Attention Module (ECCV 18)
+  - CA: channel attention module
+  - SA: spatial attention module
+
+  Args:
+    channels: input channel of tensors
+    channel_reduction: reduction ratio in `CA`
+    spatial_first: put SA ahead of CA (default: CA->SA)
+  """
+
+  class CA(nn.Module):
+    def __init__(self, channels, ratio=16):
+      super(CBAM.CA, self).__init__()
+      self.max_pool = nn.AdaptiveMaxPool2d(1)
+      self.avg_pool = nn.AdaptiveAvgPool2d(1)
+      self.mlp = nn.Sequential(
+          nn.Conv2d(channels, channels // ratio, 1),
+          nn.ReLU(),
+          nn.Conv2d(channels // ratio, channels, 1))
+
+    def forward(self, x):
+      maxpool = self.max_pool(x)
+      avgpool = self.avg_pool(x)
+      att = F.sigmoid(self.mlp(maxpool) + self.mlp(avgpool))
+      return att * x
+
+  class SA(nn.Module):
+    def __init__(self, kernel_size=7):
+      super(CBAM.SA, self).__init__()
+      self.conv = nn.Conv2d(2, 1, kernel_size, 1, kernel_size // 2)
+
+    def forward(self, x):
+      max_c_pool = x.max(dim=1, keepdim=True)
+      avg_c_pool = x.mean(dim=1, keepdim=True)
+      y = torch.cat([max_c_pool, avg_c_pool], dim=1)
+      att = F.sigmoid(self.conv(y))
+      return att * x
+
+  def __init__(self, channels, channel_reduction=16, spatial_first=None):
+    super(CBAM, self).__init__()
+    self.channel_attention = CBAM.CA(channels, ratio=channel_reduction)
+    self.spatial_attention = CBAM.SA(7)
+    self.spatial_first = spatial_first
+
+  def forward(self, inputs):
+    if self.spatial_first:
+      x = self.spatial_attention(inputs)
+      return self.channel_attention(x)
+    else:
+      x = self.channel_attention(inputs)
+      return self.spatial_attention(x)
+
+
+class Conv2dLSTMCell(nn.Module):
+  """ConvLSTM cell.
+  Copied from https://gist.github.com/Kaixhin/57901e91e5c5a8bac3eb0cbbdd3aba81
+  Special thanks to @Kaixhin
+  """
+
+  def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+               padding=0, dilation=1, groups=1, bias=True):
+    super(Conv2dLSTMCell, self).__init__()
+    if in_channels % groups != 0:
+      raise ValueError('in_channels must be divisible by groups')
+    if out_channels % groups != 0:
+      raise ValueError('out_channels must be divisible by groups')
+    kernel_size = _pair(kernel_size)
+    stride = _pair(stride)
+    padding = _pair(padding)
+    dilation = _pair(dilation)
+    self.in_channels = in_channels
+    self.out_channels = out_channels
+    self.kernel_size = kernel_size
+    self.stride = stride
+    self.padding = padding
+    self.padding_h = tuple(
+      k // 2 for k, s, p, d in zip(kernel_size, stride, padding, dilation))
+    self.dilation = dilation
+    self.groups = groups
+    self.weight_ih = Parameter(
+      torch.Tensor(4 * out_channels, in_channels // groups, *kernel_size))
+    self.weight_hh = Parameter(
+      torch.Tensor(4 * out_channels, out_channels // groups, *kernel_size))
+    self.weight_ch = Parameter(
+      torch.Tensor(3 * out_channels, out_channels // groups, *kernel_size))
+    if bias:
+      self.bias_ih = Parameter(torch.Tensor(4 * out_channels))
+      self.bias_hh = Parameter(torch.Tensor(4 * out_channels))
+      self.bias_ch = Parameter(torch.Tensor(3 * out_channels))
+    else:
+      self.register_parameter('bias_ih', None)
+      self.register_parameter('bias_hh', None)
+      self.register_parameter('bias_ch', None)
+    self.register_buffer('wc_blank', torch.zeros(out_channels))
+    self.reset_parameters()
+
+  def reset_parameters(self):
+    n = 4 * self.in_channels
+    for k in self.kernel_size:
+      n *= k
+    stdv = 1. / math.sqrt(n)
+    self.weight_ih.data.uniform_(-stdv, stdv)
+    self.weight_hh.data.uniform_(-stdv, stdv)
+    self.weight_ch.data.uniform_(-stdv, stdv)
+    if self.bias_ih is not None:
+      self.bias_ih.data.uniform_(-stdv, stdv)
+      self.bias_hh.data.uniform_(-stdv, stdv)
+      self.bias_ch.data.uniform_(-stdv, stdv)
+
+  def forward(self, input, hx):
+    h_0, c_0 = hx
+
+    wx = F.conv2d(input, self.weight_ih, self.bias_ih, self.stride,
+                  self.padding, self.dilation, self.groups)
+    wh = F.conv2d(h_0, self.weight_hh, self.bias_hh, self.stride,
+                  self.padding_h, self.dilation, self.groups)
+    # Cell uses a Hadamard product instead of a convolution?
+    wc = F.conv2d(c_0, self.weight_ch, self.bias_ch, self.stride,
+                  self.padding_h, self.dilation, self.groups)
+    v = Variable(self.wc_blank).reshape((1, -1, 1, 1))
+    wxhc = wx + wh + torch.cat((wc[:, :2 * self.out_channels],
+                                v.expand(wc.size(0), wc.size(1) // 3,
+                                         wc.size(2), wc.size(3)),
+                                wc[:, 2 * self.out_channels:]), 1)
+
+    i = torch.sigmoid(wxhc[:, :self.out_channels])
+    f = torch.sigmoid(wxhc[:, self.out_channels:2 * self.out_channels])
+    g = torch.tanh(wxhc[:, 2 * self.out_channels:3 * self.out_channels])
+    o = torch.sigmoid(wxhc[:, 3 * self.out_channels:])
+
+    c_1 = f * c_0 + i * g
+    h_1 = o * torch.tanh(c_1)
+    return h_1, (h_1, c_1)
diff --git a/VSR/Backend/Torch/Models/Discriminator.py b/VSR/Backend/Torch/Models/Ops/Discriminator.py
similarity index 96%
rename from VSR/Backend/Torch/Models/Discriminator.py
rename to VSR/Backend/Torch/Models/Ops/Discriminator.py
index 5624fe0..2daea31 100644
--- a/VSR/Backend/Torch/Models/Discriminator.py
+++ b/VSR/Backend/Torch/Models/Ops/Discriminator.py
@@ -1,10 +1,11 @@
-#  Copyright (c): Wenyi Tang 2017-2019.
+#  Copyright (c) 2017-2020 Wenyi Tang.
 #  Author: Wenyi Tang
-#  Email: wenyi.tang@intel.com
-#  Update Date: 2019/4/25 下午4:08
+#  Email: wenyitang@outlook.com
+#  Update: 2020 - 6 - 15
 
 from torch import nn
-from .Arch import EasyConv2d, RB, Activation
+
+from .Blocks import Activation, EasyConv2d, RB
 
 
 def _pull_conv_args(**kwargs):
diff --git a/VSR/Backend/Torch/Models/Ops/Distortion.py b/VSR/Backend/Torch/Models/Ops/Distortion.py
new file mode 100644
index 0000000..6d731e5
--- /dev/null
+++ b/VSR/Backend/Torch/Models/Ops/Distortion.py
@@ -0,0 +1,140 @@
+#  Copyright (c) 2017-2020 Wenyi Tang.
+#  Author: Wenyi Tang
+#  Email: wenyitang@outlook.com
+#  Update: 2020 - 6 - 15
+
+import random
+
+import torch
+import torch.nn as nn
+import torchvision.transforms.functional as F
+from torchvision.transforms import Compose, Lambda, ToPILImage, ToTensor
+
+from VSR.Util.Math import gaussian_kernel
+from ...Util.Utility import gaussian_noise, imfilter, poisson_noise
+
+
+class Distortion(nn.Module):
+  """Randomly change the brightness, contrast and saturation of an image.
+
+  Args:
+      brightness (float or tuple of float (min, max)): How much to jitter brightness.
+          brightness_factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness]
+          or the given [min, max]. Should be non negative numbers.
+      contrast (float or tuple of float (min, max)): How much to jitter contrast.
+          contrast_factor is chosen uniformly from [max(0, 1 - contrast), 1 + contrast]
+          or the given [min, max]. Should be non negative numbers.
+      saturation (float or tuple of float (min, max)): How much to jitter saturation.
+          saturation_factor is chosen uniformly from [max(0, 1 - saturation), 1 + saturation]
+          or the given [min, max]. Should be non negative numbers.
+      hue (float or tuple of float (min, max)): How much to jitter hue.
+          hue_factor is chosen uniformly from [-hue, hue] or the given [min, max].
+          Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5.
+  """
+
+  def __init__(self, brightness=0, contrast=0, saturation=0, hue=0,
+               gaussian_noise_std=0, poisson_noise_std=0, gaussian_blur_std=0):
+    super(Distortion, self).__init__()
+    self.brightness = self._check_input(brightness, 'brightness')
+    self.contrast = self._check_input(contrast, 'contrast')
+    self.saturation = self._check_input(saturation, 'saturation')
+    self.hue = self._check_input(hue, 'hue', center=0, bound=(-0.5, 0.5),
+                                 clip_first_on_zero=False)
+    self.awgn = self._check_input(gaussian_noise_std, 'awgn', center=0,
+                                  bound=(0, 0.75), clip_first_on_zero=True)
+    self.poisson = None
+    self.blur = self._check_input(gaussian_blur_std, 'blur', center=0)
+    self.blur_padding = nn.ReflectionPad2d(7)
+
+  def _check_input(self, value, name, center=1, bound=(0, float('inf')),
+                   clip_first_on_zero=True):
+    if isinstance(value, (tuple, list)) and len(value) == 2:
+      if not bound[0] <= value[0] <= value[1] <= bound[1]:
+        raise ValueError("{} values should be between {}".format(name, bound))
+    else:
+      if value < 0:
+        raise ValueError(
+            "If {} is a single number, it must be non negative.".format(name))
+      value = [center - value, center + value]
+      if clip_first_on_zero:
+        value[0] = max(value[0], 0)
+
+    # if value is 0 or (1., 1.) for brightness/contrast/saturation
+    # or (0., 0.) for hue, do nothing
+    if value[0] == value[1] == center:
+      value = None
+    return value
+
+  @staticmethod
+  def get_params(brightness, contrast, saturation, hue, awgn, poisson, blur):
+    """Get a randomized transform to be applied on image.
+
+    Arguments are same as that of __init__.
+
+    Returns:
+        Transform which randomly adjusts brightness, contrast and
+        saturation in a random order.
+    """
+    transforms = []
+
+    brightness_factor = 0
+    if brightness is not None:
+      brightness_factor = random.uniform(brightness[0], brightness[1])
+      transforms.append(
+          Lambda(lambda img: F.adjust_brightness(img, brightness_factor)))
+    contrast_factor = 0
+    if contrast is not None:
+      contrast_factor = random.uniform(contrast[0], contrast[1])
+      transforms.append(
+          Lambda(lambda img: F.adjust_contrast(img, contrast_factor)))
+    saturation_factor = 0
+    if saturation is not None:
+      saturation_factor = random.uniform(saturation[0], saturation[1])
+      transforms.append(
+          Lambda(lambda img: F.adjust_saturation(img, saturation_factor)))
+    hue_factor = 0
+    if hue is not None:
+      hue_factor = random.uniform(hue[0], hue[1])
+      transforms.append(Lambda(lambda img: F.adjust_hue(img, hue_factor)))
+
+    random.shuffle(transforms)
+    transform = Compose([
+      ToPILImage('RGB'),
+      *transforms,
+      ToTensor()
+    ])
+    factors = [
+      brightness_factor, contrast_factor, saturation_factor, hue_factor
+    ]
+    return transform, factors
+
+  def forward(self, x):
+    img = [x_[0].cpu() for x_ in torch.split(x, 1, dim=0)]
+    factors = []
+    for i in range(len(img)):
+      # color jitter
+      transform, fac = self.get_params(self.brightness, self.contrast,
+                                       self.saturation, self.hue, self.awgn,
+                                       self.poisson, self.blur)
+      img[i] = transform(img[i])
+      # noise & blur
+      blur_factor = 0
+      if self.blur is not None:
+        blur_factor = random.uniform(*self.blur)
+        img[i] = imfilter(
+            img[i],
+            torch.tensor(gaussian_kernel(15, blur_factor),
+                         device=img[i].device),
+            self.blur_padding)[0]
+      awgn_factor = 0
+      if self.awgn is not None:
+        awgn_factor = random.uniform(*self.awgn)
+        img[i] += gaussian_noise(img[i], stddev=awgn_factor, channel_wise=False)
+      poisson_factor = 0
+      if self.poisson is not None:
+        poisson_factor = random.uniform(*self.poisson)
+        img[i] += poisson_noise(img[i], stddev=poisson_factor)
+      fac += [awgn_factor, poisson_factor, blur_factor]
+      factors.append(torch.tensor(fac))
+      img[i] = img[i].clamp(0, 1)
+    return torch.stack(img).to(x.device), torch.stack(factors).to(x.device)
diff --git a/VSR/Backend/Torch/Models/Ops/Initializer.py b/VSR/Backend/Torch/Models/Ops/Initializer.py
new file mode 100644
index 0000000..ce8e011
--- /dev/null
+++ b/VSR/Backend/Torch/Models/Ops/Initializer.py
@@ -0,0 +1,5 @@
+#  Copyright (c) 2017-2020 Wenyi Tang.
+#  Author: Wenyi Tang
+#  Email: wenyitang@outlook.com
+#  Update: 2020 - 6 - 15
+
diff --git a/VSR/Backend/Torch/Models/Loss.py b/VSR/Backend/Torch/Models/Ops/Loss.py
similarity index 78%
rename from VSR/Backend/Torch/Models/Loss.py
rename to VSR/Backend/Torch/Models/Ops/Loss.py
index 30849dd..a05120a 100644
--- a/VSR/Backend/Torch/Models/Loss.py
+++ b/VSR/Backend/Torch/Models/Ops/Loss.py
@@ -1,7 +1,7 @@
-#  Copyright (c): Wenyi Tang 2017-2019.
+#  Copyright (c) 2017-2020 Wenyi Tang.
 #  Author: Wenyi Tang
-#  Email: wenyi.tang@intel.com
-#  Update Date: 2019/4/9 下午2:41
+#  Email: wenyitang@outlook.com
+#  Update: 2020 - 6 - 16
 
 import torch
 import torchvision
@@ -56,17 +56,45 @@ def rgan_bce_loss(x, y, x_real_than_y: bool = True):
 def ragan_bce_loss(x, y, x_real_than_y: bool = True):
   """relativistic average GAN loss"""
   if x_real_than_y:
-    return F.binary_cross_entropy_with_logits(x - y.mean(1, keepdim=True),
+    return F.binary_cross_entropy_with_logits(x - y.mean(),
                                               torch.ones_like(x)) + \
-           F.binary_cross_entropy_with_logits(y - x.mean(1, keepdim=True),
+           F.binary_cross_entropy_with_logits(y - x.mean(),
                                               torch.zeros_like(y))
   else:
-    return F.binary_cross_entropy_with_logits(y - x.mean(1, keepdim=True),
+    return F.binary_cross_entropy_with_logits(y - x.mean(),
                                               torch.ones_like(x)) + \
-           F.binary_cross_entropy_with_logits(x - y.mean(1, keepdim=True),
+           F.binary_cross_entropy_with_logits(x - y.mean(),
                                               torch.zeros_like(y))
 
 
+class GeneratorLoss(nn.Module):
+  def __init__(self, name='GAN'):
+    self.type = name
+    super(GeneratorLoss, self).__init__()
+
+  def forward(self, x, y=None):
+    if self.type == 'RGAN':
+      return rgan_bce_loss(x, y, True)
+    elif self.type == 'RAGAN':
+      return ragan_bce_loss(x, y, True)
+    else:
+      return gan_bce_loss(x, True)
+
+
+class DiscriminatorLoss(nn.Module):
+  def __init__(self, name='GAN'):
+    self.type = name
+    super(DiscriminatorLoss, self).__init__()
+
+  def forward(self, x, y=None):
+    if self.type == 'RGAN':
+      return rgan_bce_loss(x, y, False)
+    elif self.type == 'RAGAN':
+      return ragan_bce_loss(x, y, False)
+    else:
+      return gan_bce_loss(x, False) + gan_bce_loss(y, True)
+
+
 class VggFeatureLoss(nn.Module):
   # layer name stick to keras model
   _LAYER_NAME = {
diff --git a/VSR/Backend/Torch/Models/Ops/Motion.py b/VSR/Backend/Torch/Models/Ops/Motion.py
new file mode 100644
index 0000000..9ad78dd
--- /dev/null
+++ b/VSR/Backend/Torch/Models/Ops/Motion.py
@@ -0,0 +1,185 @@
+#  Copyright (c) 2017-2020 Wenyi Tang.
+#  Author: Wenyi Tang
+#  Email: wenyitang@outlook.com
+#  Update: 2020 - 6 - 15
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from VSR.Util.Math import nd_meshgrid
+from ...Util.Utility import irtranspose, transpose
+
+
+class STN(nn.Module):
+  """Spatial transformer network.
+    For optical flow based frame warping.
+
+  Args:
+    mode: sampling interpolation mode of `grid_sample`
+    padding_mode: can be `zeros` | `borders`
+    normalized: flow value is normalized to [-1, 1] or absolute value
+  """
+
+  def __init__(self, mode='bilinear', padding_mode='zeros', normalize=False):
+    super(STN, self).__init__()
+    self.mode = mode
+    self.padding_mode = padding_mode
+    self.norm = normalize
+
+  def forward(self, inputs, u, v=None, gain=1):
+    batch = inputs.size(0)
+    device = inputs.device
+    mesh = nd_meshgrid(*inputs.shape[-2:], permute=[1, 0])
+    mesh = torch.tensor(mesh, dtype=torch.float32, device=device)
+    mesh = mesh.unsqueeze(0).repeat_interleave(batch, dim=0)
+    # add flow to mesh
+    if v is None:
+      assert u.shape[1] == 2, "optical flow must have 2 channels"
+      _u, _v = u[:, 0], u[:, 1]
+    else:
+      _u, _v = u, v
+    if not self.norm:
+      # flow needs to normalize to [-1, 1]
+      h, w = inputs.shape[-2:]
+      _u = _u / w * 2
+      _v = _v / h * 2
+    flow = torch.stack([_u, _v], dim=-1) * gain
+    assert flow.shape == mesh.shape, \
+      f"Shape mis-match: {flow.shape} != {mesh.shape}"
+    mesh = mesh + flow
+    return F.grid_sample(inputs, mesh,
+                         mode=self.mode, padding_mode=self.padding_mode)
+
+
+class STTN(nn.Module):
+  """Spatio-temporal transformer network. (ECCV 2018)
+
+  Args:
+    transpose_ncthw: how input tensor be transposed to format NCTHW
+    mode: sampling interpolation mode of `grid_sample`
+    padding_mode: can be `zeros` | `borders`
+    normalize: flow value is normalized to [-1, 1] or absolute value
+  """
+
+  def __init__(self, transpose_ncthw=(0, 1, 2, 3, 4),
+               normalize=False, mode='bilinear', padding_mode='zeros'):
+    super(STTN, self).__init__()
+    self.normalized = normalize
+    self.mode = mode
+    self.padding_mode = padding_mode
+    self.t = transpose_ncthw
+
+  def forward(self, inputs, d, u, v):
+    _error_msg = "STTN only works for 5D tensor but got {}D input!"
+    if inputs.dim() != 5:
+      raise ValueError(_error_msg.format(inputs.dim()))
+    device = inputs.device
+    batch, channel, t, h, w = (inputs.shape[i] for i in self.t)
+    mesh = nd_meshgrid(t, h, w, permute=[2, 1, 0])
+    mesh = torch.tensor(mesh, dtype=torch.float32, device=device)
+    mesh = mesh.unsqueeze(0).repeat_interleave(batch, dim=0)
+    _d, _u, _v = d, u, v
+    if not self.normalized:
+      _d = d / t * 2
+      _u = u / w * 2
+      _v = v / h * 2
+    st_flow = torch.stack([_u, _v, _d], dim=-1)
+    st_flow = st_flow.unsqueeze(1).repeat_interleave(t, dim=1)
+    assert st_flow.shape == mesh.shape, \
+      f"Shape mis-match: {st_flow.shape} != {mesh.shape}"
+    mesh = mesh + st_flow
+    inputs = transpose(inputs, self.t)
+    warp = F.grid_sample(inputs, mesh, mode=self.mode,
+                         padding_mode=self.padding_mode)
+    # STTN warps into a single frame
+    warp = warp[:, :, 0:1]
+    return irtranspose(warp, self.t)
+
+
+class CoarseFineFlownet(nn.Module):
+  def __init__(self, channel):
+    """Coarse to fine flow estimation network
+
+    Originally from paper "Real-Time Video Super-Resolution with Spatio-Temporal
+    Networks and Motion Compensation".
+    See Vespcn.py
+    """
+
+    super(CoarseFineFlownet, self).__init__()
+    in_c = channel * 2
+    # Coarse Flow
+    conv1 = nn.Sequential(nn.Conv2d(in_c, 24, 5, 2, 2), nn.ReLU(True))
+    conv2 = nn.Sequential(nn.Conv2d(24, 24, 3, 1, 1), nn.ReLU(True))
+    conv3 = nn.Sequential(nn.Conv2d(24, 24, 5, 2, 2), nn.ReLU(True))
+    conv4 = nn.Sequential(nn.Conv2d(24, 24, 3, 1, 1), nn.ReLU(True))
+    conv5 = nn.Sequential(nn.Conv2d(24, 32, 3, 1, 1), nn.Tanh())
+    up1 = nn.PixelShuffle(4)
+    self.coarse_flow = nn.Sequential(conv1, conv2, conv3, conv4, conv5, up1)
+    # Fine Flow
+    in_c = channel * 3 + 2
+    conv1 = nn.Sequential(nn.Conv2d(in_c, 24, 5, 2, 2), nn.ReLU(True))
+    conv2 = nn.Sequential(nn.Conv2d(24, 24, 3, 1, 1), nn.ReLU(True))
+    conv3 = nn.Sequential(nn.Conv2d(24, 24, 3, 1, 1), nn.ReLU(True))
+    conv4 = nn.Sequential(nn.Conv2d(24, 24, 3, 1, 1), nn.ReLU(True))
+    conv5 = nn.Sequential(nn.Conv2d(24, 8, 3, 1, 1), nn.Tanh())
+    up2 = nn.PixelShuffle(2)
+    self.fine_flow = nn.Sequential(conv1, conv2, conv3, conv4, conv5, up2)
+    self.warp_c = STN(padding_mode='border')
+
+  def forward(self, target, ref, gain=1):
+    """Estimate optical flow from `ref` frame to `target` frame"""
+
+    flow_c = self.coarse_flow(torch.cat((ref, target), 1))
+    wc = self.warp_c(ref, flow_c[:, 0], flow_c[:, 1])
+    flow_f = self.fine_flow(torch.cat((ref, target, flow_c, wc), 1)) + flow_c
+    flow_f *= gain
+    return flow_f
+
+
+class Flownet(nn.Module):
+  def __init__(self, channel):
+    """Flow estimation network
+
+    Originally from paper "FlowNet: Learning Optical Flow with Convolutional
+    Networks" and adapted according to paper "Frame-Recurrent Video
+    Super-Resolution".
+    See Frvsr.py
+
+    Args:
+      channel: input channels of each sequential frame
+    """
+
+    super(Flownet, self).__init__()
+    f = 32
+    layers = []
+    in_c = channel * 2
+    for i in range(3):
+      layers += [nn.Conv2d(in_c, f, 3, 1, 1), nn.LeakyReLU(0.2, inplace=True)]
+      layers += [nn.Conv2d(f, f, 3, 1, 1), nn.LeakyReLU(0.2, inplace=True)]
+      layers += [nn.MaxPool2d(2)]
+      in_c = f
+      f *= 2
+    for i in range(3):
+      layers += [nn.Conv2d(in_c, f, 3, 1, 1), nn.LeakyReLU(0.2, inplace=True)]
+      layers += [nn.Conv2d(f, f, 3, 1, 1), nn.LeakyReLU(0.2, inplace=True)]
+      layers += [
+        nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)]
+      in_c = f
+      f //= 2
+    layers += [nn.Conv2d(in_c, f, 3, 1, 1), nn.LeakyReLU(0.2, inplace=True)]
+    layers += [nn.Conv2d(f, 2, 3, 1, 1), nn.Tanh()]
+    self.body = nn.Sequential(*layers)
+
+  def forward(self, target, ref, gain=1):
+    """Estimate densely optical flow from `ref` to `target`
+
+    Args:
+      target: frame A
+      ref: frame B
+      gain: a scalar multiplied to final flow map
+    """
+
+    x = torch.cat((target, ref), 1)
+    x = self.body(x) * gain
+    return x
diff --git a/VSR/Backend/Torch/Models/Ops/Scale.py b/VSR/Backend/Torch/Models/Ops/Scale.py
new file mode 100644
index 0000000..0563ac1
--- /dev/null
+++ b/VSR/Backend/Torch/Models/Ops/Scale.py
@@ -0,0 +1,160 @@
+#  Copyright (c) 2017-2020 Wenyi Tang.
+#  Author: Wenyi Tang
+#  Email: wenyitang@outlook.com
+#  Update: 2020 - 6 - 15
+
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .Blocks import Activation, EasyConv2d
+
+
+class _UpsampleNearest(nn.Module):
+  def __init__(self, scale):
+    super(_UpsampleNearest, self).__init__()
+    self.scale = scale
+
+  def forward(self, x, scale=None):
+    scale = scale or self.scale
+    return F.interpolate(x, scale_factor=scale)
+
+
+class _UpsampleLinear(nn.Module):
+  def __init__(self, scale):
+    super(_UpsampleLinear, self).__init__()
+    self._mode = ('linear', 'bilinear', 'trilinear')
+    self.scale = scale
+
+  def forward(self, x, scale=None):
+    scale = scale or self.scale
+    mode = self._mode[x.dim() - 3]
+    return F.interpolate(x, scale_factor=scale, mode=mode, align_corners=False)
+
+
+class Upsample(nn.Module):
+  def __init__(self, channel, scale, method='ps', name='Upsample', **kwargs):
+    super(Upsample, self).__init__()
+    self.name = name
+    self.channel = channel
+    self.scale = scale
+    self.method = method.lower()
+    self.group = kwargs.get('group', 1)
+    self.kernel_size = kwargs.get('kernel_size', 3)
+
+    _allowed_methods = ('ps', 'nearest', 'deconv', 'linear')
+    assert self.method in _allowed_methods
+    act = kwargs.get('activation')
+
+    samplers = []
+    while scale > 1:
+      if scale % 2 == 1 or scale == 2:
+        samplers.append(self.upsampler(self.method, scale, act))
+        break
+      else:
+        samplers.append(self.upsampler(self.method, 2, act))
+        scale //= 2
+    self.body = nn.Sequential(*samplers)
+
+  def upsampler(self, method, scale, activation=None):
+    body = []
+    k = self.kernel_size
+    if method == 'ps':
+      p = k // 2  # padding
+      s = 1  # strides
+      body = [nn.Conv2d(self.channel, self.channel * scale * scale, k, s, p,
+                        groups=self.group),
+              nn.PixelShuffle(scale)]
+      if activation:
+        body.insert(1, Activation(activation))
+    if method == 'deconv':
+      q = k % 2  # output padding
+      p = (k + q) // 2 - 1  # padding
+      s = scale  # strides
+      body = [nn.ConvTranspose2d(self.channel, self.channel, k, s, p, q,
+                                 groups=self.group)]
+      if activation:
+        body.insert(1, Activation(activation))
+    if method == 'nearest':
+      body = [_UpsampleNearest(scale),
+              nn.Conv2d(self.channel, self.channel, k, 1, k // 2,
+                        groups=self.group)]
+      if activation:
+        body.append(Activation(activation))
+    if method == 'linear':
+      body = [_UpsampleLinear(scale),
+              nn.Conv2d(self.channel, self.channel, k, 1, k // 2,
+                        groups=self.group)]
+      if activation:
+        body.append(Activation(activation))
+    return nn.Sequential(*body)
+
+  def forward(self, x, **kwargs):
+    return self.body(x)
+
+  def extra_repr(self):
+    return f"{self.name}: scale={self.scale}"
+
+
+class MultiscaleUpsample(nn.Module):
+  def __init__(self, channel, scales=(2, 3, 4), **kwargs):
+    super(MultiscaleUpsample, self).__init__()
+    for i in scales:
+      self.__setattr__(f'up{i}', Upsample(channel, i, **kwargs))
+
+  def forward(self, x, scale):
+    return self.__getattr__(f'up{scale}')(x)
+
+
+class SpaceToDim(nn.Module):
+  def __init__(self, scale_factor, dims=(-2, -1), dim=0):
+    super(SpaceToDim, self).__init__()
+    self.scale_factor = scale_factor
+    self.dims = dims
+    self.dim = dim
+
+  def forward(self, x):
+    _shape = list(x.shape)
+    shape = _shape.copy()
+    dims = [x.dim() + self.dims[0] if self.dims[0] < 0 else self.dims[0],
+            x.dim() + self.dims[1] if self.dims[1] < 0 else self.dims[1]]
+    dims = [max(abs(dims[0]), abs(dims[1])),
+            min(abs(dims[0]), abs(dims[1]))]
+    if self.dim in dims:
+      raise RuntimeError("Integrate dimension can't be space dimension!")
+    shape[dims[0]] //= self.scale_factor
+    shape[dims[1]] //= self.scale_factor
+    shape.insert(dims[0] + 1, self.scale_factor)
+    shape.insert(dims[1] + 1, self.scale_factor)
+    dim = self.dim if self.dim < dims[1] else self.dim + 1
+    dim = dim if dim <= dims[0] else dim + 1
+    x = x.reshape(*shape)
+    perm = [dim, dims[1] + 1, dims[0] + 2]
+    perm = [i for i in range(min(perm))] + perm
+    perm.extend((i for i in range(x.dim()) if i not in perm))
+    x = x.permute(*perm)
+    shape = _shape
+    shape[self.dim] *= self.scale_factor ** 2
+    shape[self.dims[0]] //= self.scale_factor
+    shape[self.dims[1]] //= self.scale_factor
+    return x.reshape(*shape)
+
+  def extra_repr(self):
+    return f'scale_factor={self.scale_factor}'
+
+
+class SpaceToDepth(nn.Module):
+  def __init__(self, block_size):
+    super(SpaceToDepth, self).__init__()
+    self.body = SpaceToDim(block_size, dim=1)
+
+  def forward(self, x):
+    return self.body(x)
+
+
+class SpaceToBatch(nn.Module):
+  def __init__(self, block_size):
+    super(SpaceToBatch, self).__init__()
+    self.body = SpaceToDim(block_size, dim=0)
+
+  def forward(self, x):
+    return self.body(x)
diff --git a/VSR/Backend/Torch/Models/Ops/__init__.py b/VSR/Backend/Torch/Models/Ops/__init__.py
new file mode 100644
index 0000000..ce8e011
--- /dev/null
+++ b/VSR/Backend/Torch/Models/Ops/__init__.py
@@ -0,0 +1,5 @@
+#  Copyright (c) 2017-2020 Wenyi Tang.
+#  Author: Wenyi Tang
+#  Email: wenyitang@outlook.com
+#  Update: 2020 - 6 - 15
+
diff --git a/VSR/Backend/Torch/Models/Optim/SISR.py b/VSR/Backend/Torch/Models/Optim/SISR.py
new file mode 100644
index 0000000..03a930a
--- /dev/null
+++ b/VSR/Backend/Torch/Models/Optim/SISR.py
@@ -0,0 +1,210 @@
+#  Copyright (c) 2017-2020 Wenyi Tang.
+#  Author: Wenyi Tang
+#  Email: wenyitang@outlook.com
+#  Update: 2020 - 6 - 16
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..Model import SuperResolution
+from ..Ops.Discriminator import DCGAN
+from ..Ops.Loss import VggFeatureLoss, GeneratorLoss, DiscriminatorLoss
+from ...Framework.Summary import get_writer
+from ...Util import Metrics
+from ...Util.Utility import pad_if_divide, upsample
+
+
+def get_opt(opt_config, params, lr):
+  if opt_config is None:
+    return torch.optim.Adam(params, lr=lr)
+  if opt_config.get('name') == 'Adadelta':
+    kwargs = opt_config
+    kwargs.pop('name')
+    return torch.optim.Adadelta(params, lr=lr, **kwargs)
+  elif opt_config.get('name') == 'Adagrad':
+    kwargs = opt_config
+    kwargs.pop('name')
+    return torch.optim.Adagrad(params, lr=lr, **kwargs)
+  elif opt_config.get('name') == 'Adam':
+    kwargs = opt_config
+    kwargs.pop('name')
+    return torch.optim.Adam(params, lr=lr, **kwargs)
+  elif opt_config.get('name') == 'SparseAdam':
+    kwargs = opt_config
+    kwargs.pop('name')
+    return torch.optim.SparseAdam(params, lr=lr, **kwargs)
+  elif opt_config.get('name') == 'Adamax':
+    kwargs = opt_config
+    kwargs.pop('name')
+    return torch.optim.Adamax(params, lr=lr, **kwargs)
+  elif opt_config.get('name') == 'ASGD':
+    kwargs = opt_config
+    kwargs.pop('name')
+    return torch.optim.ASGD(params, lr=lr, **kwargs)
+  elif opt_config.get('name') == 'SGD':
+    kwargs = opt_config
+    kwargs.pop('name')
+    return torch.optim.SGD(params, lr=lr, **kwargs)
+  elif opt_config.get('name') == 'LBFGS':
+    kwargs = opt_config
+    kwargs.pop('name')
+    return torch.optim.LBFGS(params, lr=lr, **kwargs)
+  elif opt_config.get('name') == 'Rprop':
+    kwargs = opt_config
+    kwargs.pop('name')
+    return torch.optim.Rprop(params, lr=lr, **kwargs)
+  elif opt_config.get('name') == 'RMSprop':
+    kwargs = opt_config
+    kwargs.pop('name')
+    return torch.optim.RMSprop(params, lr=lr, **kwargs)
+
+
+def get_pix_cri(cri_config=None):
+  if cri_config is None:
+    return nn.L1Loss()
+  elif cri_config.get('name') == 'L1':
+    return nn.L1Loss()
+  elif cri_config.get('name') == 'L2':
+    return nn.MSELoss()
+  elif cri_config.get('name') == 'MSE':
+    return nn.MSELoss()
+  else:
+    return nn.L1Loss()
+
+
+class L1Optimizer(SuperResolution):
+  def __init__(self, scale=1, channel=3, **kwargs):
+    super(L1Optimizer, self).__init__(scale, channel)
+    # gradient clip
+    self.clip = kwargs.get('clip')
+    # default use Adam with beta1=0.9 and beta2=0.999
+    self.opt = get_opt(kwargs.get('opt'), self.trainable_variables(), 1e-4)
+    self.padding = kwargs.get('padding', 0)
+
+  def fn(self, x):
+    raise NotImplementedError
+
+  def train(self, inputs, labels, learning_rate=None):
+    sr = self.fn(inputs[0])
+    loss = F.l1_loss(sr, labels[0])
+    if learning_rate:
+      for param_group in self.opt.param_groups:
+        param_group["lr"] = learning_rate
+    self.opt.zero_grad()
+    loss.backward()
+    if self.clip:
+      torch.nn.utils.clip_grad_norm_(self.trainable_variables(), self.clip)
+    self.opt.step()
+    return {'l1': loss.detach().cpu().numpy()}
+
+  def eval(self, inputs, labels=None, **kwargs):
+    metrics = {}
+    _lr = inputs[0]
+    if self.padding:
+      lr = pad_if_divide(_lr, self.padding)
+      a = lr.size(2) - _lr.size(2)
+      b = lr.size(3) - _lr.size(3)
+      slice_h = slice(None) if a == 0 else slice(a // 2, -a // 2)
+      slice_w = slice(None) if b == 0 else slice(b // 2, -b // 2)
+      sr = self.fn(lr)[..., slice_h, slice_w]
+    else:
+      sr = self.fn(_lr)
+    sr = sr.cpu().detach()
+    if labels is not None:
+      metrics['psnr'] = Metrics.psnr(sr.numpy(), labels[0].cpu().numpy())
+      writer = get_writer(self.name)
+      if writer is not None:
+        step = kwargs.get('epoch')
+        writer.image('sr', sr.clamp(0, 1), max=1, step=step)
+        writer.image('gt', labels[0], max=1, step=step)
+    return [sr.numpy()], metrics
+
+  def export(self, export_dir):
+    """An example of how to export ONNX format"""
+
+    # ONNX needs input placeholder to export model!
+    # Sounds stupid to set a 48x48 inputs.
+
+    name, model = self.modules.popitem(last=False)
+    device = list(model.parameters())[0].device
+    inputs = torch.randn(1, self.channel, 48, 48, device=device)
+    torch.onnx.export(model, (inputs,), export_dir / f'{name}.onnx')
+
+
+class PerceptualOptimizer(L1Optimizer):
+  """Predefined optimizer framework for SISR task in name of `SRGAN` manner
+
+  Implement `fn` function in subclass
+  """
+
+  def __init__(self, scale, channel, image_weight=1, feature_weight=0,
+               gan_weight=0, patch_size=128, **kwargs):
+    super(PerceptualOptimizer, self).__init__(scale, channel, **kwargs)
+    self.use_vgg = feature_weight > 0
+    self.use_gan = gan_weight > 0
+    if self.use_vgg:
+      # tricks: do not save weights of vgg
+      feature_lists = kwargs.get('vgg_features', ['block5_conv4'])
+      self.feature = [VggFeatureLoss(feature_lists, True)]
+    if self.use_gan:
+      # define D-net
+      self.dnet = DCGAN(3, 8, norm='BN', favor='C')
+      self.optd = torch.optim.Adam(self.trainable_variables('dnet'), 1e-4)
+    # image, vgg, gan
+    self.w = [image_weight, feature_weight, gan_weight]
+    self.pixel_cri = get_pix_cri(kwargs.get('cri_image'))
+    self.gen_cri = GeneratorLoss(kwargs.get('cri_gan', 'GAN'))
+    self.disc_cri = DiscriminatorLoss(kwargs.get('cri_gan', 'GAN'))
+
+  def cuda(self):
+    super(PerceptualOptimizer, self).cuda()
+    if self.use_vgg > 0:
+      self.feature[0].cuda()
+
+  def train(self, inputs, labels, learning_rate=None):
+    sr = self.fn(inputs[0])
+    for opt in self.opts.values():
+      if learning_rate:
+        for param_group in opt.param_groups:
+          param_group["lr"] = learning_rate
+    image_loss = self.pixel_cri(sr, labels[0])
+    loss = image_loss * self.w[0]
+    log = {
+      'image': image_loss.detach().cpu().numpy()
+    }
+    if self.use_vgg:
+      self.feature[0].eval()
+      feat_fake = self.feature[0](sr)[0]
+      feat_real = self.feature[0](labels[0])[0].detach()
+      feature_loss = self.pixel_cri(feat_fake, feat_real)
+      loss += feature_loss * self.w[1]
+      log.update(feature=feature_loss.detach().cpu().numpy())
+    if self.use_gan:
+      for p in self.dnet.parameters():
+        p.requires_grad = False
+      fake = self.dnet(sr)
+      real = self.dnet(labels[0]).detach()
+      gen_loss = self.gen_cri(fake, real)
+      loss += gen_loss * self.w[2]
+      log.update(gen=gen_loss.detach().cpu().numpy())
+    # update G
+    self.opt.zero_grad()
+    loss.backward()
+    if self.clip:
+      clip = self.clip / learning_rate
+      torch.nn.utils.clip_grad_norm_(self.trainable_variables(), clip)
+    self.opt.step()
+    if self.use_gan:
+      # update D
+      for p in self.dnet.parameters():
+        p.requires_grad = True
+      disc_fake = self.dnet(sr.detach())
+      disc_real = self.dnet(labels[0])
+      disc_loss = self.disc_cri(disc_fake, disc_real)
+      self.optd.zero_grad()
+      disc_loss.backward()
+      self.optd.step()
+      log.update(disc=disc_loss.detach().cpu().numpy())
+    return log
diff --git a/VSR/Backend/Torch/Models/Qprn.py b/VSR/Backend/Torch/Models/Qprn.py
index 546e3c9..8a311d6 100644
--- a/VSR/Backend/Torch/Models/Qprn.py
+++ b/VSR/Backend/Torch/Models/Qprn.py
@@ -9,12 +9,13 @@
 import torchvision
 from torch import nn
 
-from .Arch import CascadeRdn, Rdb, SpaceToDepth, Upsample
 from .Crdn import Upsample as RsrUp
-from .Discriminator import DCGAN
-from .Loss import gan_bce_loss, total_variance
 from .Model import SuperResolution
-from .video.motion import STTN
+from .Ops.Blocks import CascadeRdn, Rdb
+from .Ops.Discriminator import DCGAN
+from .Ops.Loss import gan_bce_loss, total_variance
+from .Ops.Motion import STTN
+from .Ops.Scale import SpaceToDepth, Upsample
 from ..Framework.Summary import get_writer
 from ..Framework.Trainer import SRTrainer, from_tensor, to_tensor
 from ..Util import Metrics
@@ -25,19 +26,19 @@ class Fnet(nn.Module):
   def __init__(self, channel, L=2, gain=64):
     super(Fnet, self).__init__()
     self.lq_entry = nn.Sequential(
-      nn.Conv2d(channel * (L + 1), 16, 3, 1, 1),
-      SpaceToDepth(4),
-      nn.Conv2d(256, 64, 1, 1, 0),
-      Rdb(64), Rdb(64))
+        nn.Conv2d(channel * (L + 1), 16, 3, 1, 1),
+        SpaceToDepth(4),
+        nn.Conv2d(256, 64, 1, 1, 0),
+        Rdb(64), Rdb(64))
     self.hq_entry = nn.Sequential(
-      nn.Conv2d(channel * L, 16, 3, 1, 1),
-      SpaceToDepth(4),
-      nn.Conv2d(256, 64, 1, 1, 0),
-      Rdb(64), Rdb(64))
+        nn.Conv2d(channel * L, 16, 3, 1, 1),
+        SpaceToDepth(4),
+        nn.Conv2d(256, 64, 1, 1, 0),
+        Rdb(64), Rdb(64))
     self.flownet = nn.Sequential(
-      nn.Conv2d(128, 64, 1, 1, 0),
-      Rdb(64), Rdb(64), Upsample(64, 4),
-      nn.Conv2d(64, 3, 3, 1, 1), nn.Tanh())
+        nn.Conv2d(128, 64, 1, 1, 0),
+        Rdb(64), Rdb(64), Upsample(64, 4),
+        nn.Conv2d(64, 3, 3, 1, 1), nn.Tanh())
     gain = torch.as_tensor([L, gain, gain], dtype=torch.float32)
     self.gain = gain.reshape(1, 3, 1, 1)
 
@@ -56,11 +57,11 @@ class Unet(nn.Module):
   def __init__(self, channel, N=2):
     super(Unet, self).__init__()
     self.entry = nn.Sequential(
-      nn.Conv2d(channel * N, 32, 3, 1, 1),
-      SpaceToDepth(2),
-      nn.Conv2d(128, 32, 1, 1, 0))
+        nn.Conv2d(channel * N, 32, 3, 1, 1),
+        SpaceToDepth(2),
+        nn.Conv2d(128, 32, 1, 1, 0))
     self.exit = nn.Sequential(
-      Upsample(32, 2), nn.Conv2d(32, channel, 3, 1, 1))
+        Upsample(32, 2), nn.Conv2d(32, channel, 3, 1, 1))
     self.down1 = nn.Conv2d(32, 64, 3, 2, 1)
     self.up1 = RsrUp([64, 32])
     self.cb = CascadeRdn(64, 3, True)
@@ -240,8 +241,10 @@ def eval(self, inputs, labels=None, **kwargs):
     c = idr_lq.shape[-1] - idr_lq_.shape[-1]
     a, b = a // 2, -a // 2
     c, d = c // 2, -c // 2
-    if a == 0: a = b = None
-    if c == 0: c = d = None
+    if a == 0:
+      a = b = None
+    if c == 0:
+      c = d = None
     idr = self.qprn.refiner(idr_lq, idr_lq)
     length = self.qprn.L + 1
     windows = {
@@ -265,7 +268,7 @@ def eval(self, inputs, labels=None, **kwargs):
         windows['predict'].append(hq_warp.detach().cpu().numpy()[..., a:b, c:d])
       elif self.debug.get('see_flow'):
         windows['predict'].append(torch.stack(
-          flow[1:], dim=1).detach().cpu().numpy()[..., a:b, c:d])
+            flow[1:], dim=1).detach().cpu().numpy()[..., a:b, c:d])
       else:
         windows['predict'].append(hq.detach().cpu().numpy()[..., a:b, c:d])
       time_loss += F.mse_loss(hq, hq_warp).detach()
diff --git a/VSR/Backend/Torch/Models/Rbpn.py b/VSR/Backend/Torch/Models/Rbpn.py
index 350d471..5815ed1 100644
--- a/VSR/Backend/Torch/Models/Rbpn.py
+++ b/VSR/Backend/Torch/Models/Rbpn.py
@@ -3,32 +3,124 @@
 #  Email: wenyi.tang@intel.com
 #  Update Date: 2019/5/26 下午3:24
 
+import logging
+
 import torch
 import torch.nn.functional as F
 from torch import nn
 
-from .Loss import total_variance
+from .Dbpn import Dbpn, DownBlock, UpBlock
 from .Model import SuperResolution
-from .frvsr.ops import FNet
-from .rbpn.ops import Rbpn
-from .video.motion import STN
+from .Ops.Blocks import EasyConv2d, RB
+from .Ops.Loss import total_variance
+from .Ops.Motion import Flownet, STN
 from ..Framework.Summary import get_writer
 from ..Util.Metrics import psnr
 from ..Util.Utility import pad_if_divide, upsample
 
+_logger = logging.getLogger("VSR.RBPN")
+_logger.info("LICENSE: RBPN is implemented by M. Haris, et. al. @alterzero")
+_logger.warning(
+    "I use unsupervised flownet to estimate optical flow, rather than pyflow module.")
+
+
+class DbpnS(nn.Module):
+  def __init__(self, scale, base_filter, feat, num_stages):
+    super(DbpnS, self).__init__()
+    kernel, stride = Dbpn.get_kernel_stride(scale)
+    # Initial Feature Extraction
+    self.feat1 = EasyConv2d(base_filter, feat, 1, activation='prelu')
+    # Back-projection stages
+    for i in range(num_stages):
+      self.__setattr__(f'up{i}', UpBlock(feat, kernel, stride))
+      if i < num_stages - 1:
+        # not the last layer
+        self.__setattr__(f'down{i}', DownBlock(feat, kernel, stride))
+    self.num_stages = num_stages
+    # Reconstruction
+    self.output_conv = EasyConv2d(feat * num_stages, feat, 1)
+
+  def forward(self, x):
+    x = self.feat1(x)
+    h1 = [self.__getattr__('up0')(x)]
+    d1 = []
+    for i in range(self.num_stages):
+      d1.append(self.__getattr__(f'down{i}')(h1[-1]))
+      h1.append(self.__getattr__(f'up{i + 1}')(d1[-1]))
+    x = self.output_conv(torch.cat(h1, 1))
+    return x
+
+
+class Rbpn(nn.Module):
+  def __init__(self, channel, scale, base_filter, feat, n_resblock,
+               nFrames):
+    super(Rbpn, self).__init__()
+    self.nFrames = nFrames
+    kernel, stride = Dbpn.get_kernel_stride(scale)
+    # Initial Feature Extraction
+    self.feat0 = EasyConv2d(channel, base_filter, 3, activation='prelu')
+    self.feat1 = EasyConv2d(8, base_filter, 3, activation='prelu')
+    ###DBPNS
+    self.DBPN = DbpnS(scale, base_filter, feat, 3)
+    # Res-Block1
+    modules_body1 = [RB(base_filter, kernel_size=3, activation='prelu') for _ in
+                     range(n_resblock)]
+    modules_body1.append(
+        EasyConv2d(base_filter, feat, kernel, stride, activation='prelu',
+                   transposed=True))
+    self.res_feat1 = nn.Sequential(*modules_body1)
+    # Res-Block2
+    modules_body2 = [RB(feat, kernel_size=3, activation='prelu') for _ in
+                     range(n_resblock)]
+    modules_body2.append(EasyConv2d(feat, feat, 3, activation='prelu'))
+    self.res_feat2 = nn.Sequential(*modules_body2)
+    # Res-Block3
+    modules_body3 = [RB(feat, kernel_size=3, activation='prelu') for _ in
+                     range(n_resblock)]
+    modules_body3.append(EasyConv2d(feat, base_filter, kernel, stride,
+                                    activation='prelu'))
+    self.res_feat3 = nn.Sequential(*modules_body3)
+    # Reconstruction
+    self.output = EasyConv2d((nFrames - 1) * feat, channel, 3)
+
+  def forward(self, x, neigbor, flow):
+    ### initial feature extraction
+    feat_input = self.feat0(x)
+    feat_frame = []
+    for j in range(len(neigbor)):
+      feat_frame.append(self.feat1(torch.cat((x, neigbor[j], flow[j]), 1)))
+
+    ####Projection
+    Ht = []
+    for j in range(len(neigbor)):
+      h0 = self.DBPN(feat_input)
+      h1 = self.res_feat1(feat_frame[j])
+
+      e = h0 - h1
+      e = self.res_feat2(e)
+      h = h0 + e
+      Ht.append(h)
+      feat_input = self.res_feat3(h)
+
+    ####Reconstruction
+    out = torch.cat(Ht, 1)
+    output = self.output(out)
+
+    return output
+
 
 class Composer(nn.Module):
   def __init__(self, **kwargs):
     super(Composer, self).__init__()
     self.module = Rbpn(**kwargs)
-    self.fnet = FNet(kwargs['num_channels'])
+    self.fnet = Flownet(kwargs['num_channels'])
     self.warper = STN(padding_mode='border')
 
   def forward(self, target, neighbors):
     flows = []
     warps = []
     for i in neighbors:
-      flow = self.fnet(target, i)
+      flow = self.fnet(target, i, gain=32)
       warp = self.warper(i, flow[:, 0], flow[:, 1])
       flows.append(flow)
       warps.append(warp)
diff --git a/VSR/Backend/Torch/Models/Rcan.py b/VSR/Backend/Torch/Models/Rcan.py
index ee3cf2d..7515d17 100644
--- a/VSR/Backend/Torch/Models/Rcan.py
+++ b/VSR/Backend/Torch/Models/Rcan.py
@@ -3,23 +3,74 @@
 #  Email: wenyi.tang@intel.com
 #  Update Date: 2019 - 3 - 15
 
+import logging
+
 import torch
+import torch.nn as nn
 import torch.nn.functional as F
 
 from .Model import SuperResolution
-from .rcan import rcan
+from .Ops.Blocks import EasyConv2d, MeanShift, Rcab
+from .Ops.Scale import Upsample
 from ..Util import Metrics
-from VSR.Util.Config import Config
 
+_logger = logging.getLogger("VSR.RCAN")
+_logger.info("LICENSE: RCAN is implemented by Yulun Zhang. "
+             "@yulunzhang https://github.com/yulunzhang/RCAN.")
+
+
+class ResidualGroup(nn.Module):
+  def __init__(self, n_feat, kernel_size, reduction, n_resblocks):
+    super(ResidualGroup, self).__init__()
+    modules_body = [Rcab(n_feat, reduction, kernel_size=kernel_size) for _ in
+                    range(n_resblocks)]
+    modules_body.append(EasyConv2d(n_feat, n_feat, kernel_size))
+    self.body = nn.Sequential(*modules_body)
+
+  def forward(self, x):
+    res = self.body(x)
+    res += x
+    return res
 
-class RCAN(SuperResolution):
 
-  def __init__(self, scale, **kwargs):
-    super(RCAN, self).__init__(scale, 3)
-    args = Config(kwargs)
-    args.scale = [scale]
-    self.rgb_range = args.rgb_range
-    self.rcan = rcan.RCAN(args)
+class Rcan(nn.Module):
+  def __init__(self, channel, scale, n_resgroups, n_resblocks, n_feats,
+               reduction, rgb_range):
+    super(Rcan, self).__init__()
+    # RGB mean for DIV2K
+    rgb_mean = (0.4488, 0.4371, 0.4040)
+    self.sub_mean = MeanShift(rgb_mean, True, rgb_range)
+    # define head module
+    modules_head = [EasyConv2d(channel, n_feats, 3)]
+    # define body module
+    modules_body = [
+      ResidualGroup(n_feats, 3, reduction, n_resblocks) for _ in
+      range(n_resgroups)]
+    modules_body.append(EasyConv2d(n_feats, n_feats, 3))
+    # define tail module
+    modules_tail = [Upsample(n_feats, scale),
+                    EasyConv2d(n_feats, channel, 3)]
+    self.add_mean = MeanShift(rgb_mean, False, rgb_range)
+    self.head = nn.Sequential(*modules_head)
+    self.body = nn.Sequential(*modules_body)
+    self.tail = nn.Sequential(*modules_tail)
+
+  def forward(self, x):
+    x = self.sub_mean(x)
+    x = self.head(x)
+    res = self.body(x) + x
+    x = self.tail(res)
+    x = self.add_mean(x)
+    return x
+
+
+class RCAN(SuperResolution):
+  def __init__(self, channel, scale, n_resgroups, n_resblocks, n_feats,
+               reduction, **kwargs):
+    super(RCAN, self).__init__(scale, channel)
+    self.rgb_range = kwargs.get('rgb_range', 255)
+    self.rcan = Rcan(channel, scale, n_resgroups, n_resblocks, n_feats,
+                     reduction, self.rgb_range)
     self.opt = torch.optim.Adam(self.trainable_variables(), 1e-4)
 
   def train(self, inputs, labels, learning_rate=None):
diff --git a/VSR/Backend/Torch/Models/SRFeat.py b/VSR/Backend/Torch/Models/SRFeat.py
index 76eee73..5a1bf11 100644
--- a/VSR/Backend/Torch/Models/SRFeat.py
+++ b/VSR/Backend/Torch/Models/SRFeat.py
@@ -8,17 +8,49 @@
 #  Email: wenyi.tang@intel.com
 #  Update Date: 2019 - 3 - 15
 
+import logging
+
 import numpy as np
 import torch
 import torch.nn.functional as F
 
-from . import Discriminator as disc
-from .Loss import VggFeatureLoss, gan_bce_loss
 from .Model import SuperResolution
-from .srfeat import ops
+from .Ops.Blocks import EasyConv2d, RB
+from .Ops.Scale import Upsample
+from .Ops.Discriminator import DCGAN
+from .Ops.Loss import VggFeatureLoss, gan_bce_loss
 from ..Framework.Summary import get_writer
 from ..Util import Metrics
 
+_logger = logging.getLogger("VSR.SRFEAT")
+_logger.info("LICENSE: SRFeat is proposed by S. Park, et. al. "
+             "Implemented via PyTorch by @LoSealL.")
+
+
+class Generator(torch.nn.Module):
+  """ Generator for SRFeat:
+  Single Image Super-Resolution with Feature Discrimination (ECCV 2018)
+  """
+
+  def __init__(self, channel, scale, filters, num_rb):
+    super(Generator, self).__init__()
+    self.head = EasyConv2d(channel, filters, 9)
+    for i in range(num_rb):
+      setattr(self, f'rb_{i:02d}', RB(filters, 3, 'lrelu', use_bn=True))
+      setattr(self, f'merge_{i:02d}', EasyConv2d(filters, filters, 1))
+    self.tail = torch.nn.Sequential(
+        Upsample(filters, scale), EasyConv2d(filters, channel, 3))
+    self.num_rb = num_rb
+
+  def forward(self, inputs):
+    x = self.head(inputs)
+    feat = []
+    for i in range(self.num_rb):
+      x = getattr(self, f'rb_{i:02d}')(x)
+      feat.append(getattr(self, f'merge_{i:02d}')(x))
+    x = self.tail(x + torch.stack(feat, dim=0).sum(0).squeeze(0))
+    return x
+
 
 class SRFEAT(SuperResolution):
   def __init__(self, channel, scale, patch_size=64, weights=(1, 0.01, 0.01),
@@ -28,15 +60,15 @@ def __init__(self, channel, scale, patch_size=64, weights=(1, 0.01, 0.01),
     f = kwargs.get('filters', 64)
     self.use_gan = weights[1] > 0
     self.use_feat_gan = weights[2] > 0
-    self.srfeat = ops.Generator(channel, scale, f, n_rb)
+    self.srfeat = Generator(channel, scale, f, n_rb)
     self.gopt = torch.optim.Adam(self.trainable_variables('srfeat'), 1e-4)
     if self.use_gan:
       # vanilla image
-      self.dnet1 = disc.DCGAN(channel, np.log2(patch_size // 4) * 2, 'bn')
+      self.dnet1 = DCGAN(channel, np.log2(patch_size // 4) * 2, 'bn')
       self.dopt1 = torch.optim.Adam(self.trainable_variables('dnet1'), 1e-4)
     if self.use_feat_gan:
       # vgg feature
-      self.dnet2 = disc.DCGAN(256, np.log2(patch_size // 16) * 2, 'bn')
+      self.dnet2 = DCGAN(256, np.log2(patch_size // 16) * 2, 'bn')
       self.dopt2 = torch.optim.Adam(self.trainable_variables('dnet2'), 1e-4)
     self.vgg = [VggFeatureLoss(['block3_conv1'], True)]
     self.w = weights
diff --git a/VSR/Backend/Torch/Models/Sofvsr.py b/VSR/Backend/Torch/Models/Sofvsr.py
index 807aca0..4e18de5 100644
--- a/VSR/Backend/Torch/Models/Sofvsr.py
+++ b/VSR/Backend/Torch/Models/Sofvsr.py
@@ -3,23 +3,203 @@
 #  Email: wenyi.tang@intel.com
 #  Update Date: 2019/4/2 上午10:54
 
+import logging
+
 import torch
+import torch.nn as nn
 import torch.nn.functional as F
 
 from .Model import SuperResolution
-from .sof.modules import SOFVSR as _SOFVSR
-from .sof.modules import optical_flow_warp
+from .Ops.Motion import STN
+from .Ops.Blocks import EasyConv2d, Rdb
 from ..Util import Metrics
 from ..Util.Metrics import total_variance
 
+_logger = logging.getLogger("VSR.SOF")
+_logger.info("LICENSE: SOF-VSR is implemented by Longguan Wang. "
+             "@LongguanWang https://github.com/LongguangWang/SOF-VSR.")
+
+
+class make_dense(nn.Module):
+  def __init__(self, channels_in, channels_out, kernel_size=3):
+    super(make_dense, self).__init__()
+    self.leaky_relu = nn.LeakyReLU(0.1, inplace=True)
+    self.conv = nn.Conv2d(channels_in, channels_out, kernel_size=kernel_size,
+                          padding=(kernel_size - 1) // 2,
+                          bias=False)
+
+  def forward(self, x):
+    out = self.leaky_relu(self.conv(x))
+    out = torch.cat((x, out), 1)
+    return out
+
+
+class RDB(nn.Module):
+  def __init__(self, nDenselayer, channels, growth):
+    super(RDB, self).__init__()
+    modules = []
+    channels_buffer = channels
+    for i in range(nDenselayer):
+      modules.append(make_dense(channels_buffer, growth))
+      channels_buffer += growth
+    self.dense_layers = nn.Sequential(*modules)
+    self.conv_1x1 = nn.Conv2d(channels_buffer, channels, kernel_size=1,
+                              padding=0, bias=False)
+
+  def forward(self, x):
+    out = self.dense_layers(x)
+    out = self.conv_1x1(out)
+    out = out + x
+    return out
+
+
+class OFRnet(nn.Module):
+  def __init__(self, upscale_factor):
+    super(OFRnet, self).__init__()
+    self.pool = nn.AvgPool2d(kernel_size=2)
+    self.upsample = nn.Upsample(scale_factor=2, mode='bilinear',
+                                align_corners=False)
+    self.final_upsample = nn.Upsample(scale_factor=upscale_factor,
+                                      mode='bilinear', align_corners=False)
+    self.shuffle = nn.PixelShuffle(upscale_factor)
+    self.upscale_factor = upscale_factor
+    # Level 1
+    self.conv_L1_1 = nn.Conv2d(2, 32, 3, 1, 1, bias=False)
+    self.RDB1_1 = RDB(4, 32, 32)
+    self.RDB1_2 = RDB(4, 32, 32)
+    self.bottleneck_L1 = nn.Conv2d(64, 2, 3, 1, 1, bias=False)
+    self.conv_L1_2 = nn.Conv2d(2, 2, 3, 1, 1, bias=True)
+    # Level 2
+    self.conv_L2_1 = nn.Conv2d(6, 32, 3, 1, 1, bias=False)
+    self.RDB2_1 = RDB(4, 32, 32)
+    self.RDB2_2 = RDB(4, 32, 32)
+    self.bottleneck_L2 = nn.Conv2d(64, 2, 3, 1, 1, bias=False)
+    self.conv_L2_2 = nn.Conv2d(2, 2, 3, 1, 1, bias=True)
+    # Level 3
+    self.conv_L3_1 = nn.Conv2d(6, 32, 3, 1, 1, bias=False)
+    self.RDB3_1 = RDB(4, 32, 32)
+    self.RDB3_2 = RDB(4, 32, 32)
+    self.bottleneck_L3 = nn.Conv2d(64, 2 * upscale_factor ** 2, 3, 1, 1,
+                                   bias=False)
+    self.conv_L3_2 = nn.Conv2d(2 * upscale_factor ** 2, 2 * upscale_factor ** 2,
+                               3, 1, 1, bias=True)
+    self.warper = STN()
+
+  def forward(self, x):
+    # Level 1
+    x_L1 = self.pool(x)
+    _, _, h, w = x_L1.size()
+    input_L1 = self.conv_L1_1(x_L1)
+    buffer_1 = self.RDB1_1(input_L1)
+    buffer_2 = self.RDB1_2(buffer_1)
+    buffer = torch.cat((buffer_1, buffer_2), 1)
+    optical_flow_L1 = self.bottleneck_L1(buffer)
+    optical_flow_L1 = self.conv_L1_2(optical_flow_L1)
+    optical_flow_L1_upscaled = self.upsample(optical_flow_L1)  # *2
+    # Level 2
+    x_L2 = self.warper(x[:, 0, :, :].unsqueeze(1), optical_flow_L1_upscaled,
+                       gain=16)
+    x_L2_res = torch.unsqueeze(x[:, 1, :, :], dim=1) - x_L2
+    x_L2 = torch.cat((x, x_L2, x_L2_res, optical_flow_L1_upscaled), 1)
+    input_L2 = self.conv_L2_1(x_L2)
+    buffer_1 = self.RDB2_1(input_L2)
+    buffer_2 = self.RDB2_2(buffer_1)
+    buffer = torch.cat((buffer_1, buffer_2), 1)
+    optical_flow_L2 = self.bottleneck_L2(buffer)
+    optical_flow_L2 = self.conv_L2_2(optical_flow_L2)
+    optical_flow_L2 = optical_flow_L2 + optical_flow_L1_upscaled
+    # Level 3
+    x_L3 = self.warper(torch.unsqueeze(x[:, 0, :, :], dim=1),
+                       optical_flow_L2, gain=16)
+    x_L3_res = torch.unsqueeze(x[:, 1, :, :], dim=1) - x_L3
+    x_L3 = torch.cat((x, x_L3, x_L3_res, optical_flow_L2), 1)
+    input_L3 = self.conv_L3_1(x_L3)
+    buffer_1 = self.RDB3_1(input_L3)
+    buffer_2 = self.RDB3_2(buffer_1)
+    buffer = torch.cat((buffer_1, buffer_2), 1)
+    optical_flow_L3 = self.bottleneck_L3(buffer)
+    optical_flow_L3 = self.conv_L3_2(optical_flow_L3)
+    optical_flow_L3 = self.shuffle(optical_flow_L3) + self.final_upsample(
+        optical_flow_L2)  # *4
+
+    return optical_flow_L3, optical_flow_L2, optical_flow_L1
+
+
+class SRnet(nn.Module):
+  def __init__(self, s, c, d):
+    """
+    Args:
+      s: scale factor
+      c: channel numbers
+      d: video sequence number
+    """
+    super(SRnet, self).__init__()
+    self.conv = nn.Conv2d(c * (2 * s ** 2 + d), 64, 3, 1, 1, bias=False)
+    self.RDB_1 = RDB(5, 64, 32)
+    self.RDB_2 = RDB(5, 64, 32)
+    self.RDB_3 = RDB(5, 64, 32)
+    self.RDB_4 = RDB(5, 64, 32)
+    self.RDB_5 = RDB(5, 64, 32)
+    self.bottleneck = nn.Conv2d(384, c * s ** 2, 1, 1, 0, bias=False)
+    self.conv_2 = nn.Conv2d(c * s ** 2, c * s ** 2, 3, 1, 1, bias=True)
+    self.shuffle = nn.PixelShuffle(upscale_factor=s)
+
+  def forward(self, x):
+    input = self.conv(x)
+    buffer_1 = self.RDB_1(input)
+    buffer_2 = self.RDB_2(buffer_1)
+    buffer_3 = self.RDB_3(buffer_2)
+    buffer_4 = self.RDB_4(buffer_3)
+    buffer_5 = self.RDB_5(buffer_4)
+    output = torch.cat(
+        (buffer_1, buffer_2, buffer_3, buffer_4, buffer_5, input), 1)
+    output = self.bottleneck(output)
+    output = self.conv_2(output)
+    output = self.shuffle(output)
+    return output
+
+
+class Sofvsr(nn.Module):
+  def __init__(self, scale, channel, depth):
+    super(Sofvsr, self).__init__()
+    self.upscale_factor = scale
+    self.c = channel
+    self.OFRnet = OFRnet(upscale_factor=scale)
+    self.SRnet = SRnet(scale, channel, depth)
+    self.warper = STN()
+
+  def forward(self, x):
+    input_01 = torch.cat((torch.unsqueeze(x[:, 0, :, :], dim=1),
+                          torch.unsqueeze(x[:, 1, :, :], dim=1)), 1)
+    input_21 = torch.cat((torch.unsqueeze(x[:, 2, :, :], dim=1),
+                          torch.unsqueeze(x[:, 1, :, :], dim=1)), 1)
+    flow_01_L3, flow_01_L2, flow_01_L1 = self.OFRnet(input_01)
+    flow_21_L3, flow_21_L2, flow_21_L1 = self.OFRnet(input_21)
+    draft_cube = x
+    for i in range(self.upscale_factor):
+      for j in range(self.upscale_factor):
+        draft_01 = self.warper(x[:, :self.c, :, :],
+                               flow_01_L3[:, :, i::self.upscale_factor,
+                               j::self.upscale_factor] / self.upscale_factor,
+                               gain=16)
+        draft_21 = self.warper(x[:, self.c * 2:, :, :],
+                               flow_21_L3[:, :, i::self.upscale_factor,
+                               j::self.upscale_factor] / self.upscale_factor,
+                               gain=16)
+        draft_cube = torch.cat((draft_cube, draft_01, draft_21), 1)
+    output = self.SRnet(draft_cube)
+    return output, (flow_01_L3, flow_01_L2, flow_01_L1), (
+      flow_21_L3, flow_21_L2, flow_21_L1)
+
 
 class SOFVSR(SuperResolution):
   """Note: SOF is Y-channel SR with depth=3"""
 
   def __init__(self, scale, channel, depth=3, **kwargs):
     super(SOFVSR, self).__init__(scale, channel, **kwargs)
-    self.sof = _SOFVSR(scale, channel, depth)
+    self.sof = Sofvsr(scale, channel, depth)
     self.opt = torch.optim.Adam(self.trainable_variables(), 1e-4)
+    self.warper = STN()
     assert depth == 3
     self.center = depth // 2
 
@@ -39,12 +219,12 @@ def train(self, inputs, labels, learning_rate=None):
     cur_d = F.avg_pool2d(cur, 2)
     nxt_d = F.avg_pool2d(nxt, 2)
 
-    pre_d_warp = optical_flow_warp(pre_d, flow01[2])
-    pre_warp = optical_flow_warp(pre, flow01[1])
-    hrp_warp = optical_flow_warp(hrp, flow01[0])
-    nxt_d_warp = optical_flow_warp(nxt_d, flow21[2])
-    nxt_warp = optical_flow_warp(nxt, flow21[1])
-    hrn_warp = optical_flow_warp(hrn, flow21[0])
+    pre_d_warp = self.warper(pre_d, flow01[2], gain=16)
+    pre_warp = self.warper(pre, flow01[1], gain=16)
+    hrp_warp = self.warper(hrp, flow01[0], gain=16)
+    nxt_d_warp = self.warper(nxt_d, flow21[2], gain=16)
+    nxt_warp = self.warper(nxt, flow21[1], gain=16)
+    hrn_warp = self.warper(hrn, flow21[0], gain=16)
 
     loss_lvl1 = F.mse_loss(pre_d_warp, cur_d) + F.mse_loss(nxt_d_warp, cur_d) + \
                 0.01 * (total_variance(flow01[2]) + total_variance(flow21[2]))
diff --git a/VSR/Backend/Torch/Models/Spmc.py b/VSR/Backend/Torch/Models/Spmc.py
index dd7781f..38da03b 100644
--- a/VSR/Backend/Torch/Models/Spmc.py
+++ b/VSR/Backend/Torch/Models/Spmc.py
@@ -3,16 +3,130 @@
 #  Email: wenyi.tang@intel.com
 #  Update Date: 2019/5/26 下午12:49
 
+import logging
+
 import torch
+import torch.nn as nn
 from torch.nn import functional as F
 
-from .Loss import total_variance
 from .Model import SuperResolution
-from .spmc.ops import DetailRevealer
+from .Ops.Blocks import Conv2dLSTMCell, EasyConv2d
+from .Ops.Loss import total_variance
+from .Ops.Motion import CoarseFineFlownet, STN
 from ..Framework.Summary import get_writer
 from ..Util.Metrics import psnr
 from ..Util.Utility import pad_if_divide, upsample
 
+_logger = logging.getLogger("VSR.SPMC")
+_logger.info("LICENSE: SPMC is proposed by X. Tao, et. al. "
+             "Implemented via PyTorch by @LoSealL.")
+_logger.info("LICENSE: ConvLSTM is implemented by @Kaixhin.")
+
+
+class ZeroUpsample(nn.Module):
+  def __init__(self, scale_factor):
+    super(ZeroUpsample, self).__init__()
+    self.ps = nn.PixelShuffle(scale_factor)
+    self.scale = scale_factor
+
+  def forward(self, x):
+    z = torch.zeros_like(x).repeat_interleave(self.scale ** 2 - 1, dim=1)
+    x = torch.cat((x, z), dim=1)
+    return self.ps(x)
+
+
+class SubPixelMotionCompensation(nn.Module):
+  def __init__(self, scale):
+    super(SubPixelMotionCompensation, self).__init__()
+    self.zero_up = ZeroUpsample(scale)
+    self.warper = STN()
+    self.scale = scale
+
+  def forward(self, x, u=0, v=0, flow=None):
+    if flow is not None:
+      u = flow[:, 0]
+      v = flow[:, 1]
+    x2 = self.zero_up(x)
+    u2 = self.zero_up(u.unsqueeze(1)) * self.scale
+    v2 = self.zero_up(v.unsqueeze(1)) * self.scale
+    return self.warper(x2, u2.squeeze(1), v2.squeeze(1))
+
+
+class MotionEstimation(nn.Module):
+  def __init__(self, channel, gain=32):
+    super(MotionEstimation, self).__init__()
+    self.gain = gain
+    self.flownet = CoarseFineFlownet(channel)
+
+  def forward(self, target, ref, to_tuple=None):
+    flow = self.flownet(target, ref, self.gain)
+    if to_tuple:
+      return flow[:, 0], flow[:, 1]
+    return flow
+
+
+class DetailFusion(nn.Module):
+  def __init__(self, channel, base_filter):
+    super(DetailFusion, self).__init__()
+    f = base_filter
+    self.enc1 = EasyConv2d(channel, f, 5, activation='relu')
+    self.enc2 = nn.Sequential(
+        EasyConv2d(f, f * 2, 3, 2, activation='relu'),
+        EasyConv2d(f * 2, f * 2, 3, activation='relu'))
+    self.enc3 = EasyConv2d(f * 2, f * 4, 3, 2, activation='relu')
+    self.lstm = Conv2dLSTMCell(f * 4, f * 4, 3, 1, 1)
+    self.dec1 = nn.Sequential(
+        EasyConv2d(f * 4, f * 4, 3, activation='relu'),
+        nn.ConvTranspose2d(f * 4, f * 2, 4, 2, 1),
+        nn.ReLU(True))
+    self.dec2 = nn.Sequential(
+        EasyConv2d(f * 2, f * 2, 3, activation='relu'),
+        nn.ConvTranspose2d(f * 2, f, 4, 2, 1),
+        nn.ReLU(True))
+    self.dec3 = nn.Sequential(
+        EasyConv2d(f, f, 3, activation='relu'),
+        EasyConv2d(f, channel, 5))
+
+  def forward(self, x, hx):
+    add1 = self.enc1(x)
+    add2 = self.enc2(add1)
+    h0 = self.enc3(add2)
+    x, hx = self.lstm(h0, hx)
+    x = self.dec1(x)
+    x = self.dec2(x + add2)
+    x = self.dec3(x + add1)
+    return x, hx
+
+
+class DetailRevealer(nn.Module):
+  def __init__(self, scale, channel, **kwargs):
+    super(DetailRevealer, self).__init__()
+    self.base_filter = kwargs.get('base_filter', 32)
+    self.me = MotionEstimation(channel, gain=kwargs.get('gain', 32))
+    self.spmc = SubPixelMotionCompensation(scale)
+    self.vsr = DetailFusion(channel, self.base_filter)
+    self.scale = scale
+    self.hidden_state = None
+
+  def reset(self):
+    self.hidden_state = None
+
+  def forward(self, target, ref):
+    flow = self.me(target, ref)
+    hr_ref = self.spmc(ref, flow=flow)
+    hr_target = upsample(target, self.scale)
+    if self.hidden_state is None:
+      batch, _, height, width = hr_ref.shape
+      hidden_shape = (batch, self.base_filter * 4, height // 4, width // 4)
+      hx = (torch.zeros(hidden_shape, device=ref.device),
+            torch.zeros(hidden_shape, device=ref.device))
+    else:
+      hx = self.hidden_state
+    res, hx = self.vsr(hr_ref, hx)
+    sr = hr_target + res
+    self.hidden_state = hx
+    return sr, flow
+
 
 class SPMC(SuperResolution):
   def __init__(self, scale, channel, stage, lambda1, lambda2, residual,
@@ -73,7 +187,7 @@ def eval(self, inputs, labels=None, **kwargs):
     self.spmc.reset()
     frames = [x.squeeze(1) for x in inputs[0].split(1, dim=1)]
     center = len(frames) // 2
-    _frames = [pad_if_divide(x, 8, 'reflect') for x in frames]
+    _frames = [pad_if_divide(x, 12, 'reflect') for x in frames]
     target = _frames[center]
     a = (target.size(2) - frames[0].size(2)) * self.scale
     b = (target.size(3) - frames[0].size(3)) * self.scale
diff --git a/VSR/Backend/Torch/Models/Srmd.py b/VSR/Backend/Torch/Models/Srmd.py
index 9deb0b3..a24fa41 100644
--- a/VSR/Backend/Torch/Models/Srmd.py
+++ b/VSR/Backend/Torch/Models/Srmd.py
@@ -3,35 +3,68 @@
 #  Email: wenyitang@outlook.com
 #  Update: 2020 - 2 - 11
 
+import logging
+
 import numpy as np
 import torch
-import torch.nn.functional as F
+import torch.nn as nn
 
-from VSR.Util.Math import gaussian_kernel, anisotropic_gaussian_kernel
-from .Model import SuperResolution
-from .srmd import ops, pca
-from ..Framework.Summary import get_writer
-from ..Util.Metrics import psnr
+from VSR.Util.Math import anisotropic_gaussian_kernel, gaussian_kernel
+from VSR.Util.PcaPrecompute import get_degradation
+from .Ops.Blocks import EasyConv2d
+from .Optim.SISR import PerceptualOptimizer
 from ..Util.Utility import imfilter
 
+logging.getLogger("VSR.SRFEAT").info(
+    "LICENSE: SRMD is proposed by Kai Zhang, et. al. "
+    "Implemented via PyTorch by @LoSealL.")
+
+
+class Net(nn.Module):
+  """
+  SRMD CNN network. 12 conv layers
+  """
+
+  def __init__(self, scale=4, channels=3, layers=12, filters=128,
+               pca_length=15):
+    super(Net, self).__init__()
+    self.pca_length = pca_length
+    net = [EasyConv2d(channels + pca_length + 1, filters, 3, activation='relu')]
+    net += [EasyConv2d(filters, filters, 3, activation='relu') for _ in
+            range(layers - 2)]
+    net += [EasyConv2d(filters, channels * scale ** 2, 3),
+            nn.PixelShuffle(scale)]
+    self.body = nn.Sequential(*net)
+
+  def forward(self, x, kernel=None, noise=None):
+    if kernel is None and noise is None:
+      kernel = torch.zeros(x.shape[0], 15, 1, device=x.device, dtype=x.dtype)
+      noise = torch.zeros(x.shape[0], 1, 1, device=x.device, dtype=x.dtype)
+    # degradation parameter
+    degpar = torch.cat([kernel, noise.reshape([-1, 1, 1])], dim=1)
+    degpar = degpar.reshape([-1, 1 + self.pca_length, 1, 1])
+    degpar = torch.ones_like(x)[:, 0:1] * degpar
+    _x = torch.cat([x, degpar], dim=1)
+    return self.body(_x)
 
-class SRMD(SuperResolution):
+
+class SRMD(PerceptualOptimizer):
   def __init__(self, scale, channel, degradation=None, **kwargs):
-    super(SRMD, self).__init__(scale, channel)
-    self.srmd = ops.Net(scale=scale, channels=channel, **kwargs)
-    self.opt = torch.optim.Adam(self.trainable_variables(), 1e-4)
     degradation = degradation or {}
     noise = degradation.get('noise', 0)
     if noise > 1:
       noise /= 255
     assert 0 <= noise <= 1
-    self.pca_dim = kwargs.get('pca_dim', pca._PCA.shape[0])
+    self.pca_dim = kwargs.get('pca_dim', 15)
     self.kernel_size = degradation.get('kernel_size', 15)
     self.ktype = degradation.get('kernel_type', 'isotropic')
     self.l1 = degradation.get('l1', 0.1)
     self.l2 = degradation.get('l2', 0.1)
     self.theta = degradation.get('theta', 0.1)
     self.noise = noise
+    self.blur_padding = torch.nn.ReflectionPad2d(7)
+    self.srmd = Net(scale=scale, channels=channel, **kwargs)
+    super(SRMD, self).__init__(scale, channel)
 
   def gen_kernel(self, ktype, ksize, l1, l2=None, theta=0):
     if ktype == 'isotropic':
@@ -51,60 +84,38 @@ def gen_random_kernel(self):
 
   def gen_random_noise(self, shape):
     stddev = np.random.uniform(0, self.noise, size=[shape[0]])
-    noise = np.random.normal(size=shape) * stddev
+    noise = np.random.normal(size=shape) * stddev.reshape([-1, 1, 1, 1])
     return noise, stddev
 
-  def train(self, inputs, labels, learning_rate=None):
-    for opt in self.opts.values():
-      if learning_rate:
-        for param_group in opt.param_groups:
-          param_group["lr"] = learning_rate
-    lr = inputs[0]
+  def fn(self, lr):
     batch = lr.shape[0]
-    noise, stddev = self.gen_random_noise(lr.shape)
-    kernel = [self.gen_random_kernel() for _ in range(batch)]
-    degpar = torch.tensor([pca.get_degradation(k) for k in kernel],
-                          dtype=lr.dtype, device=lr.device)
-    kernel = torch.tensor(kernel, dtype=lr.dtype, device=lr.device)
-    noise = torch.tensor(noise, dtype=lr.dtype, device=lr.device)
-    stddev = torch.tensor(stddev, dtype=lr.dtype, device=lr.device)
-    lr = imfilter(lr, kernel) + noise
-    sr = self.srmd(lr, degpar, stddev)
-    loss = F.l1_loss(sr, labels[0])
-    self.opt.zero_grad()
-    loss.backward()
-    self.opt.step()
-    return {
-      'loss': loss.detach().cpu().numpy()
-    }
-
-  def eval(self, inputs, labels=None, **kwargs):
-    metrics = {}
-    lr = inputs[0]
-    batch = lr.shape[0]
-    degpar = torch.tensor(
-        [
-          pca.get_degradation(self.gen_kernel(self.ktype,
-                                              self.kernel_size,
-                                              self.l1,
-                                              self.l2,
-                                              self.theta))
-        ] * batch,
-        dtype=lr.dtype,
-        device=lr.device)
-    stddev = torch.tensor(
-        [self.noise] * batch,
-        dtype=lr.dtype,
-        device=lr.device)
-    sr = self.srmd(lr, degpar, stddev).detach().cpu()
-    if labels is not None:
-      metrics['psnr'] = psnr(sr, labels[0])
-      writer = get_writer(self.name)
-      if writer is not None:
-        step = kwargs.get('epoch', 0)
-        writer.image('gt', labels[0], step=step)
-        writer.image('clean', sr.clamp(0, 1), step=step)
-    return [sr.numpy()], metrics
+    if self.srmd.training:
+      noise, stddev = self.gen_random_noise(lr.shape)
+      kernel = [self.gen_random_kernel() for _ in range(batch)]
+      degpar = torch.tensor([get_degradation(k) for k in kernel],
+                            dtype=lr.dtype, device=lr.device)
+      kernel = torch.tensor(kernel, dtype=lr.dtype, device=lr.device)
+      noise = torch.tensor(noise, dtype=lr.dtype, device=lr.device)
+      stddev = torch.tensor(stddev, dtype=lr.dtype, device=lr.device)
+      lr = imfilter(lr, kernel, self.blur_padding) + noise
+      sr = self.srmd(lr, degpar, stddev)
+    else:
+      degpar = torch.tensor(
+          [
+            get_degradation(self.gen_kernel(self.ktype,
+                                            self.kernel_size,
+                                            self.l1,
+                                            self.l2,
+                                            self.theta))
+          ] * batch,
+          dtype=lr.dtype,
+          device=lr.device)
+      stddev = torch.tensor(
+          [self.noise] * batch,
+          dtype=lr.dtype,
+          device=lr.device)
+      sr = self.srmd(lr, degpar, stddev)
+    return sr
 
   def export(self, export_dir):
     """An example of how to export ONNX format"""
diff --git a/VSR/Backend/Torch/Models/TecoGAN.py b/VSR/Backend/Torch/Models/TecoGAN.py
index 88762f7..bb0603e 100644
--- a/VSR/Backend/Torch/Models/TecoGAN.py
+++ b/VSR/Backend/Torch/Models/TecoGAN.py
@@ -3,30 +3,92 @@
 #  Email: wenyi.tang@intel.com
 #  Update Date: 2019/5/7 下午5:21
 
+import logging
+
 import numpy as np
 import torch
 import torch.nn.functional as F
 from torch import nn
 
-from .Arch import SpaceToDepth
-from .Loss import VggFeatureLoss, gan_bce_loss, ragan_bce_loss
 from .Model import SuperResolution
-from .frvsr.ops import FNet
-from .teco.ops import TecoDiscriminator, TecoGenerator
-from .video.motion import STN
+from .Ops.Blocks import EasyConv2d, RB
+from .Ops.Loss import VggFeatureLoss, ragan_bce_loss
+from .Ops.Motion import Flownet, STN
+from .Ops.Scale import SpaceToDepth, Upsample
 from ..Framework.Summary import get_writer
 from ..Util import Metrics
 from ..Util.Utility import pad_if_divide, upsample
 
+_logger = logging.getLogger("VSR.TecoGAN")
+_logger.info("LICENSE: TecoGAN is implemented by Mengyu Chu, et. al. "
+             "@rachelchu https://github.com/rachelchu/TecoGAN")
+_logger.warning("Training of TecoGAN hasn't been verified!!")
+
+
+class TecoGenerator(nn.Module):
+  """Generator in TecoGAN.
+
+  Note: the flow estimation net `Fnet` shares with FRVSR.
+
+  Args:
+    filters: basic filter numbers [default: 64]
+    num_rb: number of residual blocks [default: 16]
+  """
+
+  def __init__(self, channel, scale, filters, num_rb):
+    super(TecoGenerator, self).__init__()
+    rbs = []
+    for i in range(num_rb):
+      rbs.append(RB(filters, filters, 3, 'relu'))
+    self.body = nn.Sequential(
+        EasyConv2d(channel * (1 + scale ** 2), filters, 3, activation='relu'),
+        *rbs,
+        Upsample(filters, scale, 'nearest', activation='relu'),
+        EasyConv2d(filters, channel, 3))
+
+  def forward(self, x, prev, residual=None):
+    """`residual` is the bicubically upsampled HR images"""
+    sr = self.body(torch.cat((x, prev), dim=1))
+    if residual is not None:
+      sr += residual
+    return sr
+
+
+class TecoDiscriminator(nn.Module):
+  def __init__(self, channel, filters, patch_size):
+    super(TecoDiscriminator, self).__init__()
+    f = filters
+    self.conv0 = EasyConv2d(channel * 6, f, 3, activation='leaky')
+    self.conv1 = EasyConv2d(f, f, 4, 2, activation='leaky', use_bn=True)
+    self.conv2 = EasyConv2d(f, f, 4, 2, activation='leaky', use_bn=True)
+    self.conv3 = EasyConv2d(f, f * 2, 4, 2, activation='leaky', use_bn=True)
+    self.conv4 = EasyConv2d(f * 2, f * 4, 4, 2, activation='leaky', use_bn=True)
+    # self.pool = nn.AdaptiveAvgPool2d(1)
+    self.linear = nn.Linear(f * 4 * (patch_size // 16) ** 2, 1)
+
+  def forward(self, x):
+    """The inputs `x` is the concat of 8 tensors.
+      Note that we remove the duplicated gt/yt in paper (9 - 1 = 8).
+    """
+    l0 = self.conv0(x)
+    l1 = self.conv1(l0)
+    l2 = self.conv2(l1)
+    l3 = self.conv3(l2)
+    l4 = self.conv4(l3)
+    # y = self.pool(l4)
+    y = self.linear(l4.flatten(1))
+    return y, (l1, l2, l3, l4)
+
 
 class Composer(nn.Module):
   def __init__(self, scale, channel, gain=24, filters=64, n_rb=16):
     super(Composer, self).__init__()
-    self.fnet = FNet(channel, gain=gain)
+    self.fnet = Flownet(channel)
     self.gnet = TecoGenerator(channel, scale, filters, n_rb)
     self.warpper = STN(padding_mode='border')
     self.spd = SpaceToDepth(scale)
     self.scale = scale
+    self.gain = gain
 
   def forward(self, lr, lr_pre, sr_pre, detach_fnet=None):
     """
@@ -36,7 +98,7 @@ def forward(self, lr, lr_pre, sr_pre, detach_fnet=None):
        sr_pre: t_0 sr frame
        detach_fnet: detach BP to fnet
     """
-    flow = self.fnet(lr, lr_pre)
+    flow = self.fnet(lr, lr_pre, gain=self.gain)
     flow_up = self.scale * upsample(flow, self.scale)
     u, v = [x.squeeze(1) for x in flow_up.split(1, dim=1)]
     sr_warp = self.warpper(sr_pre, u, v)
diff --git a/VSR/Backend/Torch/Models/Vespcn.py b/VSR/Backend/Torch/Models/Vespcn.py
index 83d3af2..2473962 100644
--- a/VSR/Backend/Torch/Models/Vespcn.py
+++ b/VSR/Backend/Torch/Models/Vespcn.py
@@ -3,20 +3,100 @@
 #  Email: wenyi.tang@intel.com
 #  Update Date: 2019/4/3 下午5:10
 
+import logging
+
 import torch
+import torch.nn as nn
 import torch.nn.functional as F
 
 from .Model import SuperResolution
-from .vespcn import ops
+from .Ops.Blocks import EasyConv2d
+from .Ops.Motion import CoarseFineFlownet, STN
 from ..Framework.Summary import get_writer
 from ..Util import Metrics
 from ..Util.Utility import pad_if_divide
 
+_logger = logging.getLogger("VSR.VESPCN")
+_logger.info("LICENSE: VESPCN is proposed at CVPR2017 by Twitter. "
+             "Implemented by myself @LoSealL.")
+
+
+class ReluRB(nn.Module):
+  def __init__(self, inchannels, outchannels):
+    super(ReluRB, self).__init__()
+    self.conv1 = nn.Conv2d(inchannels, 64, 3, 1, 1)
+    self.conv2 = nn.Conv2d(64, outchannels, 3, 1, 1)
+
+  def forward(self, inputs):
+    x = F.relu(inputs)
+    x = self.conv1(x)
+    x = F.relu(x)
+    x = self.conv2(x)
+    return x + inputs
+
+
+class MotionCompensation(nn.Module):
+  def __init__(self, channel, gain=32):
+    super(MotionCompensation, self).__init__()
+    self.gain = gain
+    self.flownet = CoarseFineFlownet(channel)
+    self.warp_f = STN(padding_mode='border')
+
+  def forward(self, target, ref):
+    flow = self.flownet(target, ref, self.gain)
+    warping = self.warp_f(ref, flow[:, 0], flow[:, 1])
+    return warping, flow
+
+
+class SRNet(nn.Module):
+  def __init__(self, scale, channel, depth):
+    super(SRNet, self).__init__()
+    self.entry = EasyConv2d(channel * depth, 64, 3)
+    self.exit = EasyConv2d(64, channel, 3)
+    self.body = nn.Sequential(
+        ReluRB(64, 64),
+        ReluRB(64, 64),
+        ReluRB(64, 64),
+        nn.ReLU(True))
+    self.conv = EasyConv2d(64, 64 * scale ** 2, 3)
+    self.up = nn.PixelShuffle(scale)
+
+  def forward(self, inputs):
+    x = self.entry(inputs)
+    y = self.body(x) + x
+    y = self.conv(y)
+    y = self.up(y)
+    y = self.exit(y)
+    return y
+
+
+class Vespcn(nn.Module):
+  def __init__(self, scale, channel, depth):
+    super(Vespcn, self).__init__()
+    self.sr = SRNet(scale, channel, depth)
+    self.mc = MotionCompensation(channel)
+    self.depth = depth
+
+  def forward(self, *inputs):
+    center = self.depth // 2
+    target = inputs[center]
+    refs = inputs[:center] + inputs[center + 1:]
+    warps = []
+    flows = []
+    for r in refs:
+      warp, flow = self.mc(target, r)
+      warps.append(warp)
+      flows.append(flow)
+    warps.append(target)
+    x = torch.cat(warps, 1)
+    sr = self.sr(x)
+    return sr, warps[:-1], flows
+
 
 class VESPCN(SuperResolution):
   def __init__(self, scale, channel, depth=3, **kwargs):
     super(VESPCN, self).__init__(scale, channel, **kwargs)
-    self.vespcn = ops.VESPCN(scale, channel, depth)
+    self.vespcn = Vespcn(scale, channel, depth)
     self.opt = torch.optim.Adam(self.trainable_variables(), 1e-4)
     self.depth = depth
 
diff --git a/VSR/Backend/Torch/Models/carn/__init__.py b/VSR/Backend/Torch/Models/carn/__init__.py
deleted file mode 100644
index 9311587..0000000
--- a/VSR/Backend/Torch/Models/carn/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-#  Copyright (c): Wenyi Tang 2017-2019.
-#  Author: Wenyi Tang
-#  Email: wenyi.tang@intel.com
-#  Update Date: 2019 - 3 - 14
-
-import logging
-_logger = logging.getLogger("VSR.CARN")
-_logger.info("LICENSE: CARN is implemented by Namhyuk Ahn. "
-             "@nmhkahn https://github.com/nmhkahn/CARN-pytorch")
diff --git a/VSR/Backend/Torch/Models/carn/carn.py b/VSR/Backend/Torch/Models/carn/carn.py
deleted file mode 100644
index 11cd8ec..0000000
--- a/VSR/Backend/Torch/Models/carn/carn.py
+++ /dev/null
@@ -1,85 +0,0 @@
-import torch
-import torch.nn as nn
-
-from . import ops
-
-
-class Block(nn.Module):
-  def __init__(self,
-               in_channels, out_channels,
-               group=1):
-    super(Block, self).__init__()
-
-    self.b1 = ops.ResidualBlock(64, 64)
-    self.b2 = ops.ResidualBlock(64, 64)
-    self.b3 = ops.ResidualBlock(64, 64)
-    self.c1 = ops.BasicBlock(64 * 2, 64, 1, 1, 0)
-    self.c2 = ops.BasicBlock(64 * 3, 64, 1, 1, 0)
-    self.c3 = ops.BasicBlock(64 * 4, 64, 1, 1, 0)
-
-  def forward(self, x):
-    c0 = o0 = x
-
-    b1 = self.b1(o0)
-    c1 = torch.cat([c0, b1], dim=1)
-    o1 = self.c1(c1)
-
-    b2 = self.b2(o1)
-    c2 = torch.cat([c1, b2], dim=1)
-    o2 = self.c2(c2)
-
-    b3 = self.b3(o2)
-    c3 = torch.cat([c2, b3], dim=1)
-    o3 = self.c3(c3)
-
-    return o3
-
-
-class Net(nn.Module):
-  def __init__(self, **kwargs):
-    super(Net, self).__init__()
-
-    scale = kwargs.get("scale")
-    multi_scale = kwargs.get("multi_scale")
-    group = kwargs.get("group", 1)
-
-    self.sub_mean = ops.MeanShift((0.4488, 0.4371, 0.4040), sub=True)
-    self.add_mean = ops.MeanShift((0.4488, 0.4371, 0.4040), sub=False)
-
-    self.entry = nn.Conv2d(3, 64, 3, 1, 1)
-
-    self.b1 = Block(64, 64)
-    self.b2 = Block(64, 64)
-    self.b3 = Block(64, 64)
-    self.c1 = ops.BasicBlock(64 * 2, 64, 1, 1, 0)
-    self.c2 = ops.BasicBlock(64 * 3, 64, 1, 1, 0)
-    self.c3 = ops.BasicBlock(64 * 4, 64, 1, 1, 0)
-
-    self.upsample = ops.UpsampleBlock(64, scale=scale,
-                                      multi_scale=multi_scale,
-                                      group=group)
-    self.exit = nn.Conv2d(64, 3, 3, 1, 1)
-
-  def forward(self, x, scale=None):
-    x = self.sub_mean(x)
-    x = self.entry(x)
-    c0 = o0 = x
-
-    b1 = self.b1(o0)
-    c1 = torch.cat([c0, b1], dim=1)
-    o1 = self.c1(c1)
-
-    b2 = self.b2(o1)
-    c2 = torch.cat([c1, b2], dim=1)
-    o2 = self.c2(c2)
-
-    b3 = self.b3(o2)
-    c3 = torch.cat([c2, b3], dim=1)
-    o3 = self.c3(c3)
-
-    out = self.upsample(o3, scale=scale)
-
-    out = self.exit(out)
-    out = self.add_mean(out)
-
-    return out
diff --git a/VSR/Backend/Torch/Models/carn/carn_m.py b/VSR/Backend/Torch/Models/carn/carn_m.py
deleted file mode 100644
index 646ee7c..0000000
--- a/VSR/Backend/Torch/Models/carn/carn_m.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import torch
-import torch.nn as nn
-
-from . import ops
-
-
-class Block(nn.Module):
-  def __init__(self,
-               in_channels, out_channels,
-               group=1):
-    super(Block, self).__init__()
-
-    self.b1 = ops.EResidualBlock(64, 64, group=group)
-    self.c1 = ops.BasicBlock(64 * 2, 64, 1, 1, 0)
-    self.c2 = ops.BasicBlock(64 * 3, 64, 1, 1, 0)
-    self.c3 = ops.BasicBlock(64 * 4, 64, 1, 1, 0)
-
-  def forward(self, x):
-    c0 = o0 = x
-
-    b1 = self.b1(o0)
-    c1 = torch.cat([c0, b1], dim=1)
-    o1 = self.c1(c1)
-
-    b2 = self.b1(o1)
-    c2 = torch.cat([c1, b2], dim=1)
-    o2 = self.c2(c2)
-
-    b3 = self.b1(o2)
-    c3 = torch.cat([c2, b3], dim=1)
-    o3 = self.c3(c3)
-
-    return o3
-
-
-class Net(nn.Module):
-  def __init__(self, **kwargs):
-    super(Net, self).__init__()
-
-    scale = kwargs.get("scale")
-    multi_scale = kwargs.get("multi_scale")
-    group = kwargs.get("group", 1)
-
-    self.sub_mean = ops.MeanShift((0.4488, 0.4371, 0.4040), sub=True)
-    self.add_mean = ops.MeanShift((0.4488, 0.4371, 0.4040), sub=False)
-
-    self.entry = nn.Conv2d(3, 64, 3, 1, 1)
-
-    self.b1 = Block(64, 64, group=group)
-    self.b2 = Block(64, 64, group=group)
-    self.b3 = Block(64, 64, group=group)
-    self.c1 = ops.BasicBlock(64 * 2, 64, 1, 1, 0)
-    self.c2 = ops.BasicBlock(64 * 3, 64, 1, 1, 0)
-    self.c3 = ops.BasicBlock(64 * 4, 64, 1, 1, 0)
-
-    self.upsample = ops.UpsampleBlock(64, scale=scale,
-                                      multi_scale=multi_scale,
-                                      group=group)
-    self.exit = nn.Conv2d(64, 3, 3, 1, 1)
-
-  def forward(self, x, scale):
-    x = self.sub_mean(x)
-    x = self.entry(x)
-    c0 = o0 = x
-
-    b1 = self.b1(o0)
-    c1 = torch.cat([c0, b1], dim=1)
-    o1 = self.c1(c1)
-
-    b2 = self.b2(o1)
-    c2 = torch.cat([c1, b2], dim=1)
-    o2 = self.c2(c2)
-
-    b3 = self.b3(o2)
-    c3 = torch.cat([c2, b3], dim=1)
-    o3 = self.c3(c3)
-
-    out = self.upsample(o3, scale=scale)
-
-    out = self.exit(out)
-    out = self.add_mean(out)
-
-    return out
diff --git a/VSR/Backend/Torch/Models/carn/ops.py b/VSR/Backend/Torch/Models/carn/ops.py
deleted file mode 100644
index 70e817f..0000000
--- a/VSR/Backend/Torch/Models/carn/ops.py
+++ /dev/null
@@ -1,143 +0,0 @@
-import math
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-def init_weights(modules):
-  pass
-
-
-class MeanShift(nn.Module):
-  def __init__(self, mean_rgb, sub):
-    super(MeanShift, self).__init__()
-
-    sign = -1 if sub else 1
-    r = mean_rgb[0] * sign
-    g = mean_rgb[1] * sign
-    b = mean_rgb[2] * sign
-
-    self.shifter = nn.Conv2d(3, 3, 1, 1, 0)
-    self.shifter.weight.data = torch.eye(3).view(3, 3, 1, 1)
-    self.shifter.bias.data = torch.Tensor([r, g, b])
-
-    # Freeze the mean shift layer
-    for params in self.shifter.parameters():
-      params.requires_grad = False
-
-  def forward(self, x):
-    x = self.shifter(x)
-    return x
-
-
-class BasicBlock(nn.Module):
-  def __init__(self,
-               in_channels, out_channels,
-               ksize=3, stride=1, pad=1):
-    super(BasicBlock, self).__init__()
-
-    self.body = nn.Sequential(
-      nn.Conv2d(in_channels, out_channels, ksize, stride, pad),
-      nn.ReLU(inplace=True)
-    )
-
-    init_weights(self.modules)
-
-  def forward(self, x):
-    out = self.body(x)
-    return out
-
-
-class ResidualBlock(nn.Module):
-  def __init__(self,
-               in_channels, out_channels):
-    super(ResidualBlock, self).__init__()
-
-    self.body = nn.Sequential(
-      nn.Conv2d(in_channels, out_channels, 3, 1, 1),
-      nn.ReLU(inplace=True),
-      nn.Conv2d(out_channels, out_channels, 3, 1, 1),
-    )
-
-    init_weights(self.modules)
-
-  def forward(self, x):
-    out = self.body(x)
-    out = F.relu(out + x)
-    return out
-
-
-class EResidualBlock(nn.Module):
-  def __init__(self,
-               in_channels, out_channels,
-               group=1):
-    super(EResidualBlock, self).__init__()
-
-    self.body = nn.Sequential(
-      nn.Conv2d(in_channels, out_channels, 3, 1, 1, groups=group),
-      nn.ReLU(inplace=True),
-      nn.Conv2d(out_channels, out_channels, 3, 1, 1, groups=group),
-      nn.ReLU(inplace=True),
-      nn.Conv2d(out_channels, out_channels, 1, 1, 0),
-    )
-
-    init_weights(self.modules)
-
-  def forward(self, x):
-    out = self.body(x)
-    out = F.relu(out + x)
-    return out
-
-
-class UpsampleBlock(nn.Module):
-  def __init__(self,
-               n_channels, scale, multi_scale,
-               group=1):
-    super(UpsampleBlock, self).__init__()
-
-    if multi_scale:
-      self.up2 = _UpsampleBlock(n_channels, scale=2, group=group)
-      self.up3 = _UpsampleBlock(n_channels, scale=3, group=group)
-      self.up4 = _UpsampleBlock(n_channels, scale=4, group=group)
-    else:
-      self.up = _UpsampleBlock(n_channels, scale=scale, group=group)
-
-    self.multi_scale = multi_scale
-
-  def forward(self, x, scale=None):
-    if self.multi_scale:
-      if scale == 2:
-        return self.up2(x)
-      elif scale == 3:
-        return self.up3(x)
-      elif scale == 4:
-        return self.up4(x)
-    else:
-      return self.up(x)
-
-
-class _UpsampleBlock(nn.Module):
-  def __init__(self,
-               n_channels, scale,
-               group=1):
-    super(_UpsampleBlock, self).__init__()
-
-    modules = []
-    if scale == 2 or scale == 4 or scale == 8:
-      for _ in range(int(math.log(scale, 2))):
-        modules += [
-          nn.Conv2d(n_channels, 4 * n_channels, 3, 1, 1, groups=group),
-          nn.ReLU(inplace=True)]
-        modules += [nn.PixelShuffle(2)]
-    elif scale == 3:
-      modules += [nn.Conv2d(n_channels, 9 * n_channels, 3, 1, 1, groups=group),
-                  nn.ReLU(inplace=True)]
-      modules += [nn.PixelShuffle(3)]
-
-    self.body = nn.Sequential(*modules)
-    init_weights(self.modules)
-
-  def forward(self, x):
-    out = self.body(x)
-    return out
diff --git a/VSR/Backend/Torch/Models/dbpn/__init__.py b/VSR/Backend/Torch/Models/dbpn/__init__.py
deleted file mode 100644
index 18aa8e9..0000000
--- a/VSR/Backend/Torch/Models/dbpn/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-#  Copyright (c): Wenyi Tang 2017-2019.
-#  Author: Wenyi Tang
-#  Email: wenyi.tang@intel.com
-#  Update Date: 2019 - 3 - 15
-
-import logging
-_logger = logging.getLogger("VSR.DBPN")
-_logger.info("LICENSE: DBPN is implemented by Haris. "
-             "@alterzero https://github.com/alterzero/DBPN-Pytorch")
diff --git a/VSR/Backend/Torch/Models/dbpn/base_networks.py b/VSR/Backend/Torch/Models/dbpn/base_networks.py
deleted file mode 100644
index 9524b1b..0000000
--- a/VSR/Backend/Torch/Models/dbpn/base_networks.py
+++ /dev/null
@@ -1,438 +0,0 @@
-import torch
-import math
-
-
-class DenseBlock(torch.nn.Module):
-  def __init__(self, input_size, output_size, bias=True, activation='relu',
-               norm='batch'):
-    super(DenseBlock, self).__init__()
-    self.fc = torch.nn.Linear(input_size, output_size, bias=bias)
-
-    self.norm = norm
-    if self.norm == 'batch':
-      self.bn = torch.nn.BatchNorm1d(output_size)
-    elif self.norm == 'instance':
-      self.bn = torch.nn.InstanceNorm1d(output_size)
-
-    self.activation = activation
-    if self.activation == 'relu':
-      self.act = torch.nn.ReLU(True)
-    elif self.activation == 'prelu':
-      self.act = torch.nn.PReLU()
-    elif self.activation == 'lrelu':
-      self.act = torch.nn.LeakyReLU(0.2, True)
-    elif self.activation == 'tanh':
-      self.act = torch.nn.Tanh()
-    elif self.activation == 'sigmoid':
-      self.act = torch.nn.Sigmoid()
-
-  def forward(self, x):
-    if self.norm is not None:
-      out = self.bn(self.fc(x))
-    else:
-      out = self.fc(x)
-
-    if self.activation is not None:
-      return self.act(out)
-    else:
-      return out
-
-
-class ConvBlock(torch.nn.Module):
-  def __init__(self, input_size, output_size, kernel_size=3, stride=1,
-               padding=1, bias=True, activation='prelu', norm=None):
-    super(ConvBlock, self).__init__()
-    self.conv = torch.nn.Conv2d(input_size, output_size, kernel_size, stride,
-                                padding, bias=bias)
-
-    self.norm = norm
-    if self.norm == 'batch':
-      self.bn = torch.nn.BatchNorm2d(output_size)
-    elif self.norm == 'instance':
-      self.bn = torch.nn.InstanceNorm2d(output_size)
-
-    self.activation = activation
-    if self.activation == 'relu':
-      self.act = torch.nn.ReLU(True)
-    elif self.activation == 'prelu':
-      self.act = torch.nn.PReLU()
-    elif self.activation == 'lrelu':
-      self.act = torch.nn.LeakyReLU(0.2, True)
-    elif self.activation == 'tanh':
-      self.act = torch.nn.Tanh()
-    elif self.activation == 'sigmoid':
-      self.act = torch.nn.Sigmoid()
-
-  def forward(self, x):
-    if self.norm is not None:
-      out = self.bn(self.conv(x))
-    else:
-      out = self.conv(x)
-
-    if self.activation is not None:
-      return self.act(out)
-    else:
-      return out
-
-
-class DeconvBlock(torch.nn.Module):
-  def __init__(self, input_size, output_size, kernel_size=4, stride=2,
-               padding=1, bias=True, activation='prelu', norm=None):
-    super(DeconvBlock, self).__init__()
-    self.deconv = torch.nn.ConvTranspose2d(input_size, output_size, kernel_size,
-                                           stride, padding, bias=bias)
-
-    self.norm = norm
-    if self.norm == 'batch':
-      self.bn = torch.nn.BatchNorm2d(output_size)
-    elif self.norm == 'instance':
-      self.bn = torch.nn.InstanceNorm2d(output_size)
-
-    self.activation = activation
-    if self.activation == 'relu':
-      self.act = torch.nn.ReLU(True)
-    elif self.activation == 'prelu':
-      self.act = torch.nn.PReLU()
-    elif self.activation == 'lrelu':
-      self.act = torch.nn.LeakyReLU(0.2, True)
-    elif self.activation == 'tanh':
-      self.act = torch.nn.Tanh()
-    elif self.activation == 'sigmoid':
-      self.act = torch.nn.Sigmoid()
-
-  def forward(self, x):
-    if self.norm is not None:
-      out = self.bn(self.deconv(x))
-    else:
-      out = self.deconv(x)
-
-    if self.activation is not None:
-      return self.act(out)
-    else:
-      return out
-
-
-class ResnetBlock(torch.nn.Module):
-  def __init__(self, num_filter, kernel_size=3, stride=1, padding=1, bias=True,
-               activation='prelu', norm='batch'):
-    super(ResnetBlock, self).__init__()
-    self.conv1 = torch.nn.Conv2d(num_filter, num_filter, kernel_size, stride,
-                                 padding, bias=bias)
-    self.conv2 = torch.nn.Conv2d(num_filter, num_filter, kernel_size, stride,
-                                 padding, bias=bias)
-
-    self.norm = norm
-    if self.norm == 'batch':
-      self.bn = torch.nn.BatchNorm2d(num_filter)
-    elif norm == 'instance':
-      self.bn = torch.nn.InstanceNorm2d(num_filter)
-
-    self.activation = activation
-    if self.activation == 'relu':
-      self.act = torch.nn.ReLU(True)
-    elif self.activation == 'prelu':
-      self.act = torch.nn.PReLU()
-    elif self.activation == 'lrelu':
-      self.act = torch.nn.LeakyReLU(0.2, True)
-    elif self.activation == 'tanh':
-      self.act = torch.nn.Tanh()
-    elif self.activation == 'sigmoid':
-      self.act = torch.nn.Sigmoid()
-
-  def forward(self, x):
-    residual = x
-    if self.norm is not None:
-      out = self.bn(self.conv1(x))
-    else:
-      out = self.conv1(x)
-
-    if self.activation is not None:
-      out = self.act(out)
-
-    if self.norm is not None:
-      out = self.bn(self.conv2(out))
-    else:
-      out = self.conv2(out)
-
-    out = torch.add(out, residual)
-    return out
-
-
-class UpBlock(torch.nn.Module):
-  def __init__(self, num_filter, kernel_size=8, stride=4, padding=2, bias=True,
-               activation='prelu', norm=None):
-    super(UpBlock, self).__init__()
-    self.up_conv1 = DeconvBlock(num_filter, num_filter, kernel_size, stride,
-                                padding, activation, norm=None)
-    self.up_conv2 = ConvBlock(num_filter, num_filter, kernel_size, stride,
-                              padding, activation, norm=None)
-    self.up_conv3 = DeconvBlock(num_filter, num_filter, kernel_size, stride,
-                                padding, activation, norm=None)
-
-  def forward(self, x):
-    h0 = self.up_conv1(x)
-    l0 = self.up_conv2(h0)
-    h1 = self.up_conv3(l0 - x)
-    return h1 + h0
-
-
-class UpBlockPix(torch.nn.Module):
-  def __init__(self, num_filter, kernel_size=8, stride=4, padding=2, scale=4,
-               bias=True, activation='prelu', norm=None):
-    super(UpBlockPix, self).__init__()
-    self.up_conv1 = Upsampler(scale, num_filter)
-    self.up_conv2 = ConvBlock(num_filter, num_filter, kernel_size, stride,
-                              padding, activation, norm=None)
-    self.up_conv3 = Upsampler(scale, num_filter)
-
-  def forward(self, x):
-    h0 = self.up_conv1(x)
-    l0 = self.up_conv2(h0)
-    h1 = self.up_conv3(l0 - x)
-    return h1 + h0
-
-
-class D_UpBlock(torch.nn.Module):
-  def __init__(self, num_filter, kernel_size=8, stride=4, padding=2,
-               num_stages=1, bias=True, activation='prelu', norm=None):
-    super(D_UpBlock, self).__init__()
-    self.conv = ConvBlock(num_filter * num_stages, num_filter, 1, 1, 0,
-                          activation, norm=None)
-    self.up_conv1 = DeconvBlock(num_filter, num_filter, kernel_size, stride,
-                                padding, activation, norm=None)
-    self.up_conv2 = ConvBlock(num_filter, num_filter, kernel_size, stride,
-                              padding, activation, norm=None)
-    self.up_conv3 = DeconvBlock(num_filter, num_filter, kernel_size, stride,
-                                padding, activation, norm=None)
-
-  def forward(self, x):
-    x = self.conv(x)
-    h0 = self.up_conv1(x)
-    l0 = self.up_conv2(h0)
-    h1 = self.up_conv3(l0 - x)
-    return h1 + h0
-
-
-class D_UpBlockPix(torch.nn.Module):
-  def __init__(self, num_filter, kernel_size=8, stride=4, padding=2,
-               num_stages=1, scale=4, bias=True, activation='prelu', norm=None):
-    super(D_UpBlockPix, self).__init__()
-    self.conv = ConvBlock(num_filter * num_stages, num_filter, 1, 1, 0,
-                          activation, norm=None)
-    self.up_conv1 = Upsampler(scale, num_filter)
-    self.up_conv2 = ConvBlock(num_filter, num_filter, kernel_size, stride,
-                              padding, activation, norm=None)
-    self.up_conv3 = Upsampler(scale, num_filter)
-
-  def forward(self, x):
-    x = self.conv(x)
-    h0 = self.up_conv1(x)
-    l0 = self.up_conv2(h0)
-    h1 = self.up_conv3(l0 - x)
-    return h1 + h0
-
-
-class DownBlock(torch.nn.Module):
-  def __init__(self, num_filter, kernel_size=8, stride=4, padding=2, bias=True,
-               activation='prelu', norm=None):
-    super(DownBlock, self).__init__()
-    self.down_conv1 = ConvBlock(num_filter, num_filter, kernel_size, stride,
-                                padding, activation, norm=None)
-    self.down_conv2 = DeconvBlock(num_filter, num_filter, kernel_size, stride,
-                                  padding, activation, norm=None)
-    self.down_conv3 = ConvBlock(num_filter, num_filter, kernel_size, stride,
-                                padding, activation, norm=None)
-
-  def forward(self, x):
-    l0 = self.down_conv1(x)
-    h0 = self.down_conv2(l0)
-    l1 = self.down_conv3(h0 - x)
-    return l1 + l0
-
-
-class DownBlockPix(torch.nn.Module):
-  def __init__(self, num_filter, kernel_size=8, stride=4, padding=2, scale=4,
-               bias=True, activation='prelu', norm=None):
-    super(DownBlockPix, self).__init__()
-    self.down_conv1 = ConvBlock(num_filter, num_filter, kernel_size, stride,
-                                padding, activation, norm=None)
-    self.down_conv2 = Upsampler(scale, num_filter)
-    self.down_conv3 = ConvBlock(num_filter, num_filter, kernel_size, stride,
-                                padding, activation, norm=None)
-
-  def forward(self, x):
-    l0 = self.down_conv1(x)
-    h0 = self.down_conv2(l0)
-    l1 = self.down_conv3(h0 - x)
-    return l1 + l0
-
-
-class D_DownBlock(torch.nn.Module):
-  def __init__(self, num_filter, kernel_size=8, stride=4, padding=2,
-               num_stages=1, bias=True, activation='prelu', norm=None):
-    super(D_DownBlock, self).__init__()
-    self.conv = ConvBlock(num_filter * num_stages, num_filter, 1, 1, 0,
-                          activation, norm=None)
-    self.down_conv1 = ConvBlock(num_filter, num_filter, kernel_size, stride,
-                                padding, activation, norm=None)
-    self.down_conv2 = DeconvBlock(num_filter, num_filter, kernel_size, stride,
-                                  padding, activation, norm=None)
-    self.down_conv3 = ConvBlock(num_filter, num_filter, kernel_size, stride,
-                                padding, activation, norm=None)
-
-  def forward(self, x):
-    x = self.conv(x)
-    l0 = self.down_conv1(x)
-    h0 = self.down_conv2(l0)
-    l1 = self.down_conv3(h0 - x)
-    return l1 + l0
-
-
-class D_DownBlockPix(torch.nn.Module):
-  def __init__(self, num_filter, kernel_size=8, stride=4, padding=2,
-               num_stages=1, scale=4, bias=True, activation='prelu', norm=None):
-    super(D_DownBlockPix, self).__init__()
-    self.conv = ConvBlock(num_filter * num_stages, num_filter, 1, 1, 0,
-                          activation, norm=None)
-    self.down_conv1 = ConvBlock(num_filter, num_filter, kernel_size, stride,
-                                padding, activation, norm=None)
-    self.down_conv2 = Upsampler(scale, num_filter)
-    self.down_conv3 = ConvBlock(num_filter, num_filter, kernel_size, stride,
-                                padding, activation, norm=None)
-
-  def forward(self, x):
-    x = self.conv(x)
-    l0 = self.down_conv1(x)
-    h0 = self.down_conv2(l0)
-    l1 = self.down_conv3(h0 - x)
-    return l1 + l0
-
-
-class PSBlock(torch.nn.Module):
-  def __init__(self, input_size, output_size, scale_factor, kernel_size=3,
-               stride=1, padding=1, bias=True, activation='prelu',
-               norm='batch'):
-    super(PSBlock, self).__init__()
-    self.conv = torch.nn.Conv2d(input_size, output_size * scale_factor ** 2,
-                                kernel_size, stride, padding, bias=bias)
-    self.ps = torch.nn.PixelShuffle(scale_factor)
-
-    self.norm = norm
-    if self.norm == 'batch':
-      self.bn = torch.nn.BatchNorm2d(output_size)
-    elif norm == 'instance':
-      self.bn = torch.nn.InstanceNorm2d(output_size)
-
-    self.activation = activation
-    if self.activation == 'relu':
-      self.act = torch.nn.ReLU(True)
-    elif self.activation == 'prelu':
-      self.act = torch.nn.PReLU()
-    elif self.activation == 'lrelu':
-      self.act = torch.nn.LeakyReLU(0.2, True)
-    elif self.activation == 'tanh':
-      self.act = torch.nn.Tanh()
-    elif self.activation == 'sigmoid':
-      self.act = torch.nn.Sigmoid()
-
-  def forward(self, x):
-    if self.norm is not None:
-      out = self.bn(self.ps(self.conv(x)))
-    else:
-      out = self.ps(self.conv(x))
-
-    if self.activation is not None:
-      out = self.act(out)
-    return out
-
-
-class Upsampler(torch.nn.Module):
-  def __init__(self, scale, n_feat, bn=False, act='prelu', bias=True):
-    super(Upsampler, self).__init__()
-    modules = []
-    for _ in range(int(math.log(scale, 2))):
-      modules.append(
-        ConvBlock(n_feat, 4 * n_feat, 3, 1, 1, bias, activation=None,
-                  norm=None))
-      modules.append(torch.nn.PixelShuffle(2))
-      if bn: modules.append(torch.nn.BatchNorm2d(n_feat))
-      # modules.append(torch.nn.PReLU())
-    self.up = torch.nn.Sequential(*modules)
-
-    self.activation = act
-    if self.activation == 'relu':
-      self.act = torch.nn.ReLU(True)
-    elif self.activation == 'prelu':
-      self.act = torch.nn.PReLU()
-    elif self.activation == 'lrelu':
-      self.act = torch.nn.LeakyReLU(0.2, True)
-    elif self.activation == 'tanh':
-      self.act = torch.nn.Tanh()
-    elif self.activation == 'sigmoid':
-      self.act = torch.nn.Sigmoid()
-
-  def forward(self, x):
-    out = self.up(x)
-    if self.activation is not None:
-      out = self.act(out)
-    return out
-
-
-class Upsample2xBlock(torch.nn.Module):
-  def __init__(self, input_size, output_size, bias=True, upsample='deconv',
-               activation='relu', norm='batch'):
-    super(Upsample2xBlock, self).__init__()
-    scale_factor = 2
-    # 1. Deconvolution (Transposed convolution)
-    if upsample == 'deconv':
-      self.upsample = DeconvBlock(input_size, output_size,
-                                  kernel_size=4, stride=2, padding=1,
-                                  bias=bias, activation=activation, norm=norm)
-
-    # 2. Sub-pixel convolution (Pixel shuffler)
-    elif upsample == 'ps':
-      self.upsample = PSBlock(input_size, output_size,
-                              scale_factor=scale_factor,
-                              bias=bias, activation=activation, norm=norm)
-
-    # 3. Resize and Convolution
-    elif upsample == 'rnc':
-      self.upsample = torch.nn.Sequential(
-        torch.nn.Upsample(scale_factor=scale_factor, mode='nearest'),
-        ConvBlock(input_size, output_size,
-                  kernel_size=3, stride=1, padding=1,
-                  bias=bias, activation=activation, norm=norm)
-      )
-
-  def forward(self, x):
-    out = self.upsample(x)
-    return out
-
-
-class CascadedBlock(torch.nn.Module):
-  def __init__(self, padding=1, **kwargs):
-    super(CascadedBlock, self).__init__()
-    self.rb1 = ResnetBlock(64, padding=padding)
-    self.rb2 = ResnetBlock(64, padding=padding)
-    self.rb3 = ResnetBlock(64, padding=padding)
-    self.rb4 = ResnetBlock(64, padding=padding)
-
-    self.cb1 = torch.nn.Conv2d(64 * 2, 64, 1, padding=0)
-    self.cb2 = torch.nn.Conv2d(64 * 3, 64, 1, padding=0)
-    self.cb3 = torch.nn.Conv2d(64 * 4, 64, 1, padding=0)
-    self.cb4 = torch.nn.Conv2d(64 * 5, 64, 1, padding=0)
-
-  def forward(self, x):
-    x1 = self.rb1(x)
-    x1_c = torch.cat([x, x1], 1)
-    x1_s = self.cb1(x1_c)
-    x2 = self.rb2(x1_s)
-    x2_c = torch.cat([x, x1, x2], 1)
-    x2_s = self.cb2(x2_c)
-    x3 = self.rb3(x2_s)
-    x3_c = torch.cat([x, x1, x2, x3], 1)
-    x3_s = self.cb3(x3_c)
-    x4 = self.rb4(x3_s)
-    x4_c = torch.cat([x, x1, x2, x3, x4], 1)
-    x4_s = self.cb4(x4_c)
-    return x4_s
diff --git a/VSR/Backend/Torch/Models/dbpn/dbpn.py b/VSR/Backend/Torch/Models/dbpn/dbpn.py
deleted file mode 100644
index 81106b1..0000000
--- a/VSR/Backend/Torch/Models/dbpn/dbpn.py
+++ /dev/null
@@ -1,98 +0,0 @@
-import torch.nn as nn
-
-from .base_networks import *
-
-
-class Net(nn.Module):
-  def __init__(self, num_channels, base_filter, feat, num_stages, scale_factor):
-    super(Net, self).__init__()
-
-    if scale_factor == 2:
-      kernel = 6
-      stride = 2
-      padding = 2
-    elif scale_factor == 4:
-      kernel = 8
-      stride = 4
-      padding = 2
-    elif scale_factor == 8:
-      kernel = 12
-      stride = 8
-      padding = 2
-
-    # Initial Feature Extraction
-    self.feat0 = ConvBlock(num_channels, feat, 3, 1, 1, activation='prelu',
-                           norm=None)
-    self.feat1 = ConvBlock(feat, base_filter, 1, 1, 0, activation='prelu',
-                           norm=None)
-    # Back-projection stages
-    self.up1 = UpBlock(base_filter, kernel, stride, padding)
-    self.down1 = DownBlock(base_filter, kernel, stride, padding)
-    self.up2 = UpBlock(base_filter, kernel, stride, padding)
-    self.down2 = D_DownBlock(base_filter, kernel, stride, padding, 2)
-    self.up3 = D_UpBlock(base_filter, kernel, stride, padding, 2)
-    self.down3 = D_DownBlock(base_filter, kernel, stride, padding, 3)
-    self.up4 = D_UpBlock(base_filter, kernel, stride, padding, 3)
-    self.down4 = D_DownBlock(base_filter, kernel, stride, padding, 4)
-    self.up5 = D_UpBlock(base_filter, kernel, stride, padding, 4)
-    self.down5 = D_DownBlock(base_filter, kernel, stride, padding, 5)
-    self.up6 = D_UpBlock(base_filter, kernel, stride, padding, 5)
-    self.down6 = D_DownBlock(base_filter, kernel, stride, padding, 6)
-    self.up7 = D_UpBlock(base_filter, kernel, stride, padding, 6)
-    # Reconstruction
-    self.output_conv = ConvBlock(num_stages * base_filter, num_channels, 3, 1,
-                                 1, activation=None, norm=None)
-
-    for m in self.modules():
-      classname = m.__class__.__name__
-      if classname.find('Conv2d') != -1:
-        torch.nn.init.kaiming_normal_(m.weight)
-        if m.bias is not None:
-          m.bias.data.zero_()
-      elif classname.find('ConvTranspose2d') != -1:
-        torch.nn.init.kaiming_normal_(m.weight)
-        if m.bias is not None:
-          m.bias.data.zero_()
-
-  def forward(self, x):
-    x = self.feat0(x)
-    x = self.feat1(x)
-
-    h1 = self.up1(x)
-    l1 = self.down1(h1)
-    h2 = self.up2(l1)
-
-    concat_h = torch.cat((h2, h1), 1)
-    l = self.down2(concat_h)
-
-    concat_l = torch.cat((l, l1), 1)
-    h = self.up3(concat_l)
-
-    concat_h = torch.cat((h, concat_h), 1)
-    l = self.down3(concat_h)
-
-    concat_l = torch.cat((l, concat_l), 1)
-    h = self.up4(concat_l)
-
-    concat_h = torch.cat((h, concat_h), 1)
-    l = self.down4(concat_h)
-
-    concat_l = torch.cat((l, concat_l), 1)
-    h = self.up5(concat_l)
-
-    concat_h = torch.cat((h, concat_h), 1)
-    l = self.down5(concat_h)
-
-    concat_l = torch.cat((l, concat_l), 1)
-    h = self.up6(concat_l)
-
-    concat_h = torch.cat((h, concat_h), 1)
-    l = self.down6(concat_h)
-
-    concat_l = torch.cat((l, concat_l), 1)
-    h = self.up7(concat_l)
-
-    concat_h = torch.cat((h, concat_h), 1)
-    x = self.output_conv(concat_h)
-
-    return x
diff --git a/VSR/Backend/Torch/Models/dbpn/dbpn_v1.py b/VSR/Backend/Torch/Models/dbpn/dbpn_v1.py
deleted file mode 100644
index 6d07cbe..0000000
--- a/VSR/Backend/Torch/Models/dbpn/dbpn_v1.py
+++ /dev/null
@@ -1,122 +0,0 @@
-import torch.nn as nn
-
-from .base_networks import *
-
-
-class Net(nn.Module):
-  def __init__(self, num_channels, base_filter, feat, num_stages, scale_factor):
-    super(Net, self).__init__()
-
-    if scale_factor == 2:
-      kernel = 6
-      stride = 2
-      padding = 2
-    elif scale_factor == 4:
-      kernel = 8
-      stride = 4
-      padding = 2
-    elif scale_factor == 8:
-      kernel = 12
-      stride = 8
-      padding = 2
-
-    # Initial Feature Extraction
-    self.feat0 = ConvBlock(num_channels, feat, 3, 1, 1, activation='prelu',
-                           norm=None)
-    self.feat1 = ConvBlock(feat, base_filter, 1, 1, 0, activation='prelu',
-                           norm=None)
-    # Back-projection stages
-    self.up1 = UpBlock(base_filter, kernel, stride, padding)
-    self.down1 = DownBlock(base_filter, kernel, stride, padding)
-    self.up2 = UpBlock(base_filter, kernel, stride, padding)
-    self.down2 = D_DownBlock(base_filter, kernel, stride, padding, 2)
-    self.up3 = D_UpBlock(base_filter, kernel, stride, padding, 2)
-    self.down3 = D_DownBlock(base_filter, kernel, stride, padding, 3)
-    self.up4 = D_UpBlock(base_filter, kernel, stride, padding, 3)
-    self.down4 = D_DownBlock(base_filter, kernel, stride, padding, 4)
-    self.up5 = D_UpBlock(base_filter, kernel, stride, padding, 4)
-    self.down5 = D_DownBlock(base_filter, kernel, stride, padding, 5)
-    self.up6 = D_UpBlock(base_filter, kernel, stride, padding, 5)
-    self.down6 = D_DownBlock(base_filter, kernel, stride, padding, 6)
-    self.up7 = D_UpBlock(base_filter, kernel, stride, padding, 6)
-    self.down7 = D_DownBlock(base_filter, kernel, stride, padding, 7)
-    self.up8 = D_UpBlock(base_filter, kernel, stride, padding, 7)
-    self.down8 = D_DownBlock(base_filter, kernel, stride, padding, 8)
-    self.up9 = D_UpBlock(base_filter, kernel, stride, padding, 8)
-    self.down9 = D_DownBlock(base_filter, kernel, stride, padding, 9)
-    self.up10 = D_UpBlock(base_filter, kernel, stride, padding, 9)
-    # Reconstruction
-    self.output_conv = ConvBlock(num_stages * base_filter, num_channels, 3, 1,
-                                 1, activation=None, norm=None)
-
-    for m in self.modules():
-      classname = m.__class__.__name__
-      if classname.find('Conv2d') != -1:
-        torch.nn.init.kaiming_normal_(m.weight)
-        if m.bias is not None:
-          m.bias.data.zero_()
-      elif classname.find('ConvTranspose2d') != -1:
-        torch.nn.init.kaiming_normal_(m.weight)
-        if m.bias is not None:
-          m.bias.data.zero_()
-
-  def forward(self, x):
-    x = self.feat0(x)
-    x = self.feat1(x)
-
-    h1 = self.up1(x)
-    l1 = self.down1(h1)
-    h2 = self.up2(l1)
-
-    concat_h = torch.cat((h2, h1), 1)
-    l = self.down2(concat_h)
-
-    concat_l = torch.cat((l, l1), 1)
-    h = self.up3(concat_l)
-
-    concat_h = torch.cat((h, concat_h), 1)
-    l = self.down3(concat_h)
-
-    concat_l = torch.cat((l, concat_l), 1)
-    h = self.up4(concat_l)
-
-    concat_h = torch.cat((h, concat_h), 1)
-    l = self.down4(concat_h)
-
-    concat_l = torch.cat((l, concat_l), 1)
-    h = self.up5(concat_l)
-
-    concat_h = torch.cat((h, concat_h), 1)
-    l = self.down5(concat_h)
-
-    concat_l = torch.cat((l, concat_l), 1)
-    h = self.up6(concat_l)
-
-    concat_h = torch.cat((h, concat_h), 1)
-    l = self.down6(concat_h)
-
-    concat_l = torch.cat((l, concat_l), 1)
-    h = self.up7(concat_l)
-
-    concat_h = torch.cat((h, concat_h), 1)
-    l = self.down7(concat_h)
-
-    concat_l = torch.cat((l, concat_l), 1)
-    h = self.up8(concat_l)
-
-    concat_h = torch.cat((h, concat_h), 1)
-    l = self.down8(concat_h)
-
-    concat_l = torch.cat((l, concat_l), 1)
-    h = self.up9(concat_l)
-
-    concat_h = torch.cat((h, concat_h), 1)
-    l = self.down9(concat_h)
-
-    concat_l = torch.cat((l, concat_l), 1)
-    h = self.up10(concat_l)
-
-    concat_h = torch.cat((h, concat_h), 1)
-    x = self.output_conv(concat_h)
-
-    return x
diff --git a/VSR/Backend/Torch/Models/dbpn/dbpns.py b/VSR/Backend/Torch/Models/dbpn/dbpns.py
deleted file mode 100644
index 5abd946..0000000
--- a/VSR/Backend/Torch/Models/dbpn/dbpns.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import torch.nn as nn
-
-from .base_networks import *
-
-
-class Net(nn.Module):
-  def __init__(self, num_channels, base_filter, feat, num_stages, scale_factor):
-    super(Net, self).__init__()
-
-    if scale_factor == 2:
-      kernel = 6
-      stride = 2
-      padding = 2
-    elif scale_factor == 4:
-      kernel = 8
-      stride = 4
-      padding = 2
-    elif scale_factor == 8:
-      kernel = 12
-      stride = 8
-      padding = 2
-
-    # Initial Feature Extraction
-    self.feat0 = ConvBlock(num_channels, feat, 3, 1, 1, activation='prelu',
-                           norm=None)
-    self.feat1 = ConvBlock(feat, base_filter, 1, 1, 0, activation='prelu',
-                           norm=None)
-    # Back-projection stages
-    self.up1 = UpBlock(base_filter, kernel, stride, padding)
-    self.down1 = DownBlock(base_filter, kernel, stride, padding)
-    self.up2 = UpBlock(base_filter, kernel, stride, padding)
-    # Reconstruction
-    self.output_conv = ConvBlock(num_stages * base_filter, num_channels, 3, 1,
-                                 1, activation=None, norm=None)
-
-    for m in self.modules():
-      classname = m.__class__.__name__
-      if classname.find('Conv2d') != -1:
-        torch.nn.init.kaiming_normal_(m.weight)
-        if m.bias is not None:
-          m.bias.data.zero_()
-      elif classname.find('ConvTranspose2d') != -1:
-        torch.nn.init.kaiming_normal_(m.weight)
-        if m.bias is not None:
-          m.bias.data.zero_()
-
-  def forward(self, x):
-    x = self.feat0(x)
-    x = self.feat1(x)
-
-    h1 = self.up1(x)
-    h2 = self.up2(self.down1(h1))
-
-    x = self.output_conv(torch.cat((h2, h1), 1))
-
-    return x
diff --git a/VSR/Backend/Torch/Models/edsr/__init__.py b/VSR/Backend/Torch/Models/edsr/__init__.py
deleted file mode 100644
index 403b726..0000000
--- a/VSR/Backend/Torch/Models/edsr/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-#  Copyright (c): Wenyi Tang 2017-2019.
-#  Author: Wenyi Tang
-#  Email: wenyi.tang@intel.com
-#  Update Date: 2019 - 3 - 15
-
-import logging
-_logger = logging.getLogger("VSR.EDSR")
-_logger.info("LICENSE: EDSR is implemented by Bee Lim. "
-             "@thstkdgus35 https://github.com/thstkdgus35/EDSR-PyTorch")
diff --git a/VSR/Backend/Torch/Models/edsr/common.py b/VSR/Backend/Torch/Models/edsr/common.py
deleted file mode 100644
index 427c69a..0000000
--- a/VSR/Backend/Torch/Models/edsr/common.py
+++ /dev/null
@@ -1,90 +0,0 @@
-import math
-
-import torch
-import torch.nn as nn
-
-
-def default_conv(in_channels, out_channels, kernel_size, bias=True):
-  return nn.Conv2d(
-    in_channels, out_channels, kernel_size,
-    padding=(kernel_size // 2), bias=bias)
-
-
-class MeanShift(nn.Conv2d):
-  def __init__(
-      self, rgb_range,
-      rgb_mean=(0.4488, 0.4371, 0.4040), rgb_std=(1.0, 1.0, 1.0), sign=-1):
-    super(MeanShift, self).__init__(3, 3, kernel_size=1)
-    std = torch.Tensor(rgb_std)
-    self.weight.data = torch.eye(3).view(3, 3, 1, 1) / std.view(3, 1, 1, 1)
-    self.bias.data = sign * rgb_range * torch.Tensor(rgb_mean) / std
-    for p in self.parameters():
-      p.requires_grad = False
-
-
-class BasicBlock(nn.Sequential):
-  def __init__(
-      self, conv, in_channels, out_channels, kernel_size, stride=1, bias=False,
-      bn=True, act=nn.ReLU(True)):
-
-    m = [conv(in_channels, out_channels, kernel_size, bias=bias)]
-    if bn:
-      m.append(nn.BatchNorm2d(out_channels))
-    if act is not None:
-      m.append(act)
-
-    super(BasicBlock, self).__init__(*m)
-
-
-class ResBlock(nn.Module):
-  def __init__(
-      self, conv, n_feats, kernel_size,
-      bias=True, bn=False, act=nn.ReLU(True), res_scale=1):
-
-    super(ResBlock, self).__init__()
-    m = []
-    for i in range(2):
-      m.append(conv(n_feats, n_feats, kernel_size, bias=bias))
-      if bn:
-        m.append(nn.BatchNorm2d(n_feats))
-      if i == 0:
-        m.append(act)
-
-    self.body = nn.Sequential(*m)
-    self.res_scale = res_scale
-
-  def forward(self, x):
-    res = self.body(x).mul(self.res_scale)
-    res += x
-
-    return res
-
-
-class Upsampler(nn.Sequential):
-  def __init__(self, conv, scale, n_feats, bn=False, act=False, bias=True):
-
-    m = []
-    if (scale & (scale - 1)) == 0:  # Is scale = 2^n?
-      for _ in range(int(math.log(scale, 2))):
-        m.append(conv(n_feats, 4 * n_feats, 3, bias))
-        m.append(nn.PixelShuffle(2))
-        if bn:
-          m.append(nn.BatchNorm2d(n_feats))
-        if act == 'relu':
-          m.append(nn.ReLU(True))
-        elif act == 'prelu':
-          m.append(nn.PReLU(n_feats))
-
-    elif scale == 3:
-      m.append(conv(n_feats, 9 * n_feats, 3, bias))
-      m.append(nn.PixelShuffle(3))
-      if bn:
-        m.append(nn.BatchNorm2d(n_feats))
-      if act == 'relu':
-        m.append(nn.ReLU(True))
-      elif act == 'prelu':
-        m.append(nn.PReLU(n_feats))
-    else:
-      raise NotImplementedError
-
-    super(Upsampler, self).__init__(*m)
diff --git a/VSR/Backend/Torch/Models/edsr/edsr.py b/VSR/Backend/Torch/Models/edsr/edsr.py
deleted file mode 100644
index 4a51beb..0000000
--- a/VSR/Backend/Torch/Models/edsr/edsr.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import torch.nn as nn
-
-from . import common
-
-url = {
-  'r16f64x2': 'https://cv.snu.ac.kr/research/EDSR/models/edsr_baseline_x2-1bc95232.pt',
-  'r16f64x3': 'https://cv.snu.ac.kr/research/EDSR/models/edsr_baseline_x3-abf2a44e.pt',
-  'r16f64x4': 'https://cv.snu.ac.kr/research/EDSR/models/edsr_baseline_x4-6b446fab.pt',
-  'r32f256x2': 'https://cv.snu.ac.kr/research/EDSR/models/edsr_x2-0edfb8a3.pt',
-  'r32f256x3': 'https://cv.snu.ac.kr/research/EDSR/models/edsr_x3-ea3ef2c6.pt',
-  'r32f256x4': 'https://cv.snu.ac.kr/research/EDSR/models/edsr_x4-4f62e9ef.pt'
-}
-
-
-def make_model(args, parent=False):
-  return EDSR(args)
-
-
-class EDSR(nn.Module):
-  def __init__(self, args, conv=common.default_conv):
-    super(EDSR, self).__init__()
-
-    n_resblocks = args.n_resblocks
-    n_feats = args.n_feats
-    kernel_size = 3
-    scale = args.scale[0]
-    act = nn.ReLU(True)
-    self.url = url['r{}f{}x{}'.format(n_resblocks, n_feats, scale)]
-    self.sub_mean = common.MeanShift(args.rgb_range)
-    self.add_mean = common.MeanShift(args.rgb_range, sign=1)
-
-    # define head module
-    m_head = [conv(args.n_colors, n_feats, kernel_size)]
-
-    # define body module
-    m_body = [
-      common.ResBlock(
-        conv, n_feats, kernel_size, act=act, res_scale=args.res_scale
-      ) for _ in range(n_resblocks)
-    ]
-    m_body.append(conv(n_feats, n_feats, kernel_size))
-
-    # define tail module
-    m_tail = [
-      common.Upsampler(conv, scale, n_feats, act=False),
-      conv(n_feats, args.n_colors, kernel_size)
-    ]
-
-    self.head = nn.Sequential(*m_head)
-    self.body = nn.Sequential(*m_body)
-    self.tail = nn.Sequential(*m_tail)
-
-  def forward(self, x):
-    x = self.sub_mean(x)
-    x = self.head(x)
-
-    res = self.body(x)
-    res += x
-
-    x = self.tail(res)
-    x = self.add_mean(x)
-
-    return x
-
-  def load_state_dict(self, state_dict, strict=True):
-    own_state = self.state_dict()
-    for name, param in state_dict.items():
-      if name in own_state:
-        if isinstance(param, nn.Parameter):
-          param = param.data
-        try:
-          own_state[name].copy_(param)
-        except Exception:
-          if name.find('tail') == -1:
-            raise RuntimeError('While copying the parameter named {}, '
-                               'whose dimensions in the model are {} and '
-                               'whose dimensions in the checkpoint are {}.'
-                               .format(name, own_state[name].size(),
-                                       param.size()))
-      elif strict:
-        if name.find('tail') == -1:
-          raise KeyError('unexpected key "{}" in state_dict'
-                         .format(name))
diff --git a/VSR/Backend/Torch/Models/edsr/mdsr.py b/VSR/Backend/Torch/Models/edsr/mdsr.py
deleted file mode 100644
index 3100913..0000000
--- a/VSR/Backend/Torch/Models/edsr/mdsr.py
+++ /dev/null
@@ -1,67 +0,0 @@
-import torch.nn as nn
-from . import common
-
-url = {
-  'r16f64': 'https://cv.snu.ac.kr/research/EDSR/models/mdsr_baseline-a00cab12.pt',
-  'r80f64': 'https://cv.snu.ac.kr/research/EDSR/models/mdsr-4a78bedf.pt'
-}
-
-
-def make_model(args, parent=False):
-  return MDSR(args)
-
-
-class MDSR(nn.Module):
-  def __init__(self, args, conv=common.default_conv):
-    super(MDSR, self).__init__()
-    n_resblocks = args.n_resblocks
-    n_feats = args.n_feats
-    kernel_size = 3
-    act = nn.ReLU(True)
-    self.scale_idx = 0
-    self.url = url['r{}f{}'.format(n_resblocks, n_feats)]
-    self.sub_mean = common.MeanShift(args.rgb_range)
-    self.add_mean = common.MeanShift(args.rgb_range, sign=1)
-
-    m_head = [conv(args.n_colors, n_feats, kernel_size)]
-
-    self.pre_process = nn.ModuleList([
-      nn.Sequential(
-        common.ResBlock(conv, n_feats, 5, act=act),
-        common.ResBlock(conv, n_feats, 5, act=act)
-      ) for _ in args.scale
-    ])
-
-    m_body = [
-      common.ResBlock(
-        conv, n_feats, kernel_size, act=act
-      ) for _ in range(n_resblocks)
-    ]
-    m_body.append(conv(n_feats, n_feats, kernel_size))
-
-    self.upsample = nn.ModuleList([
-      common.Upsampler(conv, s, n_feats, act=False) for s in args.scale
-    ])
-
-    m_tail = [conv(n_feats, args.n_colors, kernel_size)]
-
-    self.head = nn.Sequential(*m_head)
-    self.body = nn.Sequential(*m_body)
-    self.tail = nn.Sequential(*m_tail)
-
-  def forward(self, x):
-    x = self.sub_mean(x)
-    x = self.head(x)
-    x = self.pre_process[self.scale_idx](x)
-
-    res = self.body(x)
-    res += x
-
-    x = self.upsample[self.scale_idx](res)
-    x = self.tail(x)
-    x = self.add_mean(x)
-
-    return x
-
-  def set_scale(self, scale_idx):
-    self.scale_idx = scale_idx
diff --git a/VSR/Backend/Torch/Models/esrgan/__init__.py b/VSR/Backend/Torch/Models/esrgan/__init__.py
deleted file mode 100644
index cd0b90d..0000000
--- a/VSR/Backend/Torch/Models/esrgan/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-#  Copyright (c): Wenyi Tang 2017-2019.
-#  Author: Wenyi Tang
-#  Email: wenyi.tang@intel.com
-#  Update Date: 2019 - 3 - 15
-
-import logging
-_logger = logging.getLogger("VSR.ESRGAN")
-_logger.info("LICENSE: ESRGAN is implemented by Xintao Wang. "
-             "@xinntao https://github.com/xinntao/ESRGAN")
diff --git a/VSR/Backend/Torch/Models/esrgan/architecture.py b/VSR/Backend/Torch/Models/esrgan/architecture.py
deleted file mode 100644
index 3bd9e5b..0000000
--- a/VSR/Backend/Torch/Models/esrgan/architecture.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import math
-
-import torch.nn as nn
-
-from . import block as B
-
-
-class RRDB_Net(nn.Module):
-  def __init__(self, in_nc, out_nc, nf, nb, gc=32, upscale=4, norm_type=None,
-               act_type='leakyrelu', mode='CNA', res_scale=1,
-               upsample_mode='upconv'):
-    super(RRDB_Net, self).__init__()
-    n_upscale = int(math.log(upscale, 2))
-    if upscale == 3:
-      n_upscale = 1
-
-    fea_conv = B.conv_block(in_nc, nf, kernel_size=3, norm_type=None,
-                            act_type=None)
-    rb_blocks = [
-      B.RRDB(nf, kernel_size=3, gc=32, stride=1, bias=True, pad_type='zero',
-             norm_type=norm_type, act_type=act_type, mode='CNA') for _ in
-      range(nb)]
-    LR_conv = B.conv_block(nf, nf, kernel_size=3, norm_type=norm_type,
-                           act_type=None, mode=mode)
-
-    if upsample_mode == 'upconv':
-      upsample_block = B.upconv_blcok
-    elif upsample_mode == 'pixelshuffle':
-      upsample_block = B.pixelshuffle_block
-    else:
-      raise NotImplementedError(
-        'upsample mode [%s] is not found' % upsample_mode)
-    if upscale == 3:
-      upsampler = upsample_block(nf, nf, 3, act_type=act_type)
-    else:
-      upsampler = [upsample_block(nf, nf, act_type=act_type) for _ in
-                   range(n_upscale)]
-    HR_conv0 = B.conv_block(nf, nf, kernel_size=3, norm_type=None,
-                            act_type=act_type)
-    HR_conv1 = B.conv_block(nf, out_nc, kernel_size=3, norm_type=None,
-                            act_type=None)
-
-    self.model = B.sequential(fea_conv, B.ShortcutBlock(
-      B.sequential(*rb_blocks, LR_conv)), *upsampler, HR_conv0, HR_conv1)
-
-  def forward(self, x):
-    x = self.model(x)
-    return x
diff --git a/VSR/Backend/Torch/Models/esrgan/block.py b/VSR/Backend/Torch/Models/esrgan/block.py
deleted file mode 100644
index 7eef0e9..0000000
--- a/VSR/Backend/Torch/Models/esrgan/block.py
+++ /dev/null
@@ -1,286 +0,0 @@
-from collections import OrderedDict
-
-import torch
-import torch.nn as nn
-
-
-####################
-# Basic blocks
-####################
-
-
-def act(act_type, inplace=True, neg_slope=0.2, n_prelu=1):
-  # helper selecting activation
-  # neg_slope: for leakyrelu and init of prelu
-  # n_prelu: for p_relu num_parameters
-  act_type = act_type.lower()
-  if act_type == 'relu':
-    layer = nn.ReLU(inplace)
-  elif act_type == 'leakyrelu':
-    layer = nn.LeakyReLU(neg_slope, inplace)
-  elif act_type == 'prelu':
-    layer = nn.PReLU(num_parameters=n_prelu, init=neg_slope)
-  else:
-    raise NotImplementedError('activation layer [%s] is not found' % act_type)
-  return layer
-
-
-def norm(norm_type, nc):
-  # helper selecting normalization layer
-  norm_type = norm_type.lower()
-  if norm_type == 'batch':
-    layer = nn.BatchNorm2d(nc, affine=True)
-  elif norm_type == 'instance':
-    layer = nn.InstanceNorm2d(nc, affine=False)
-  else:
-    raise NotImplementedError(
-      'normalization layer [%s] is not found' % norm_type)
-  return layer
-
-
-def pad(pad_type, padding):
-  # helper selecting padding layer
-  # if padding is 'zero', do by conv layers
-  pad_type = pad_type.lower()
-  if padding == 0:
-    return None
-  if pad_type == 'reflect':
-    layer = nn.ReflectionPad2d(padding)
-  elif pad_type == 'replicate':
-    layer = nn.ReplicationPad2d(padding)
-  else:
-    raise NotImplementedError(
-      'padding layer [%s] is not implemented' % pad_type)
-  return layer
-
-
-def get_valid_padding(kernel_size, dilation):
-  kernel_size = kernel_size + (kernel_size - 1) * (dilation - 1)
-  padding = (kernel_size - 1) // 2
-  return padding
-
-
-class ConcatBlock(nn.Module):
-  # Concat the output of a submodule to its input
-  def __init__(self, submodule):
-    super(ConcatBlock, self).__init__()
-    self.sub = submodule
-
-  def forward(self, x):
-    output = torch.cat((x, self.sub(x)), dim=1)
-    return output
-
-  def __repr__(self):
-    tmpstr = 'Identity .. \n|'
-    modstr = self.sub.__repr__().replace('\n', '\n|')
-    tmpstr = tmpstr + modstr
-    return tmpstr
-
-
-class ShortcutBlock(nn.Module):
-  # Elementwise sum the output of a submodule to its input
-  def __init__(self, submodule):
-    super(ShortcutBlock, self).__init__()
-    self.sub = submodule
-
-  def forward(self, x):
-    output = x + self.sub(x)
-    return output
-
-  def __repr__(self):
-    tmpstr = 'Identity + \n|'
-    modstr = self.sub.__repr__().replace('\n', '\n|')
-    tmpstr = tmpstr + modstr
-    return tmpstr
-
-
-def sequential(*args):
-  # Flatten Sequential. It unwraps nn.Sequential.
-  if len(args) == 1:
-    if isinstance(args[0], OrderedDict):
-      raise NotImplementedError(
-        'sequential does not support OrderedDict input.')
-    return args[0]  # No sequential is needed.
-  modules = []
-  for module in args:
-    if isinstance(module, nn.Sequential):
-      for submodule in module.children():
-        modules.append(submodule)
-    elif isinstance(module, nn.Module):
-      modules.append(module)
-  return nn.Sequential(*modules)
-
-
-def conv_block(in_nc, out_nc, kernel_size, stride=1, dilation=1, groups=1,
-               bias=True,
-               pad_type='zero', norm_type=None, act_type='relu', mode='CNA'):
-  """
-  Conv layer with padding, normalization, activation
-  mode: CNA --> Conv -> Norm -> Act
-      NAC --> Norm -> Act --> Conv (Identity Mappings in Deep Residual Networks, ECCV16)
-  """
-  assert mode in ['CNA', 'NAC', 'CNAC'], 'Wong conv mode [%s]' % mode
-  padding = get_valid_padding(kernel_size, dilation)
-  p = pad(pad_type, padding) if pad_type and pad_type != 'zero' else None
-  padding = padding if pad_type == 'zero' else 0
-
-  c = nn.Conv2d(in_nc, out_nc, kernel_size=kernel_size, stride=stride,
-                padding=padding, \
-                dilation=dilation, bias=bias, groups=groups)
-  a = act(act_type) if act_type else None
-  if 'CNA' in mode:
-    n = norm(norm_type, out_nc) if norm_type else None
-    return sequential(p, c, n, a)
-  elif mode == 'NAC':
-    if norm_type is None and act_type is not None:
-      a = act(act_type, inplace=False)
-      # Important!
-      # input----ReLU(inplace)----Conv--+----output
-      #        |________________________|
-      # inplace ReLU will modify the input, therefore wrong output
-    n = norm(norm_type, in_nc) if norm_type else None
-    return sequential(n, a, p, c)
-
-
-####################
-# Useful blocks
-####################
-
-
-class ResNetBlock(nn.Module):
-  """
-  ResNet Block, 3-3 style
-  with extra residual scaling used in EDSR
-  (Enhanced Deep Residual Networks for Single Image Super-Resolution, CVPRW 17)
-  """
-
-  def __init__(self, in_nc, mid_nc, out_nc, kernel_size=3, stride=1, dilation=1,
-               groups=1, \
-               bias=True, pad_type='zero', norm_type=None, act_type='relu',
-               mode='CNA', res_scale=1):
-    super(ResNetBlock, self).__init__()
-    conv0 = conv_block(in_nc, mid_nc, kernel_size, stride, dilation, groups,
-                       bias, pad_type, \
-                       norm_type, act_type, mode)
-    if mode == 'CNA':
-      act_type = None
-    if mode == 'CNAC':  # Residual path: |-CNAC-|
-      act_type = None
-      norm_type = None
-    conv1 = conv_block(mid_nc, out_nc, kernel_size, stride, dilation, groups,
-                       bias, pad_type, \
-                       norm_type, act_type, mode)
-    # if in_nc != out_nc:
-    #     self.project = conv_block(in_nc, out_nc, 1, stride, dilation, 1, bias, pad_type, \
-    #         None, None)
-    #     print('Need a projecter in ResNetBlock.')
-    # else:
-    #     self.project = lambda x:x
-    self.res = sequential(conv0, conv1)
-    self.res_scale = res_scale
-
-  def forward(self, x):
-    res = self.res(x).mul(self.res_scale)
-    return x + res
-
-
-class ResidualDenseBlock_5C(nn.Module):
-  """
-  Residual Dense Block
-  style: 5 convs
-  The core module of paper: (Residual Dense Network for Image Super-Resolution, CVPR 18)
-  """
-
-  def __init__(self, nc, kernel_size=3, gc=32, stride=1, bias=True,
-               pad_type='zero', \
-               norm_type=None, act_type='leakyrelu', mode='CNA'):
-    super(ResidualDenseBlock_5C, self).__init__()
-    # gc: growth channel, i.e. intermediate channels
-    self.conv1 = conv_block(nc, gc, kernel_size, stride, bias=bias,
-                            pad_type=pad_type, \
-                            norm_type=norm_type, act_type=act_type, mode=mode)
-    self.conv2 = conv_block(nc + gc, gc, kernel_size, stride, bias=bias,
-                            pad_type=pad_type, \
-                            norm_type=norm_type, act_type=act_type, mode=mode)
-    self.conv3 = conv_block(nc + 2 * gc, gc, kernel_size, stride, bias=bias,
-                            pad_type=pad_type, \
-                            norm_type=norm_type, act_type=act_type, mode=mode)
-    self.conv4 = conv_block(nc + 3 * gc, gc, kernel_size, stride, bias=bias,
-                            pad_type=pad_type, \
-                            norm_type=norm_type, act_type=act_type, mode=mode)
-    if mode == 'CNA':
-      last_act = None
-    else:
-      last_act = act_type
-    self.conv5 = conv_block(nc + 4 * gc, nc, 3, stride, bias=bias,
-                            pad_type=pad_type, \
-                            norm_type=norm_type, act_type=last_act, mode=mode)
-
-  def forward(self, x):
-    x1 = self.conv1(x)
-    x2 = self.conv2(torch.cat((x, x1), 1))
-    x3 = self.conv3(torch.cat((x, x1, x2), 1))
-    x4 = self.conv4(torch.cat((x, x1, x2, x3), 1))
-    x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1))
-    return x5.mul(0.2) + x
-
-
-class RRDB(nn.Module):
-  """
-  Residual in Residual Dense Block
-  """
-
-  def __init__(self, nc, kernel_size=3, gc=32, stride=1, bias=True,
-               pad_type='zero', \
-               norm_type=None, act_type='leakyrelu', mode='CNA'):
-    super(RRDB, self).__init__()
-    self.RDB1 = ResidualDenseBlock_5C(nc, kernel_size, gc, stride, bias,
-                                      pad_type, \
-                                      norm_type, act_type, mode)
-    self.RDB2 = ResidualDenseBlock_5C(nc, kernel_size, gc, stride, bias,
-                                      pad_type, \
-                                      norm_type, act_type, mode)
-    self.RDB3 = ResidualDenseBlock_5C(nc, kernel_size, gc, stride, bias,
-                                      pad_type, \
-                                      norm_type, act_type, mode)
-
-  def forward(self, x):
-    out = self.RDB1(x)
-    out = self.RDB2(out)
-    out = self.RDB3(out)
-    return out.mul(0.2) + x
-
-
-####################
-# Upsampler
-####################
-
-
-def pixelshuffle_block(in_nc, out_nc, upscale_factor=2, kernel_size=3, stride=1,
-                       bias=True,
-                       pad_type='zero', norm_type=None, act_type='relu'):
-  """
-  Pixel shuffle layer
-  (Real-Time Single Image and Video Super-Resolution Using an Efficient Sub-Pixel Convolutional
-  Neural Network, CVPR17)
-  """
-  conv = conv_block(in_nc, out_nc * (upscale_factor ** 2), kernel_size, stride,
-                    bias=bias,
-                    pad_type=pad_type, norm_type=None, act_type=None)
-  pixel_shuffle = nn.PixelShuffle(upscale_factor)
-
-  n = norm(norm_type, out_nc) if norm_type else None
-  a = act(act_type) if act_type else None
-  return sequential(conv, pixel_shuffle, n, a)
-
-
-def upconv_blcok(in_nc, out_nc, upscale_factor=2, kernel_size=3, stride=1,
-                 bias=True,
-                 pad_type='zero', norm_type=None, act_type='relu',
-                 mode='nearest'):
-  # Up conv
-  # described in https://distill.pub/2016/deconv-checkerboard/
-  upsample = nn.Upsample(scale_factor=upscale_factor, mode=mode)
-  conv = conv_block(in_nc, out_nc, kernel_size, stride, bias=bias,
-                    pad_type=pad_type, norm_type=norm_type, act_type=act_type)
-  return sequential(upsample, conv)
diff --git a/VSR/Backend/Torch/Models/frvsr/__init__.py b/VSR/Backend/Torch/Models/frvsr/__init__.py
deleted file mode 100644
index c588d47..0000000
--- a/VSR/Backend/Torch/Models/frvsr/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-#  Copyright (c): Wenyi Tang 2017-2019.
-#  Author: Wenyi Tang
-#  Email: wenyi.tang@intel.com
-#  Update Date: 2019/4/4 下午8:51
diff --git a/VSR/Backend/Torch/Models/frvsr/ops.py b/VSR/Backend/Torch/Models/frvsr/ops.py
deleted file mode 100644
index d4f7ce2..0000000
--- a/VSR/Backend/Torch/Models/frvsr/ops.py
+++ /dev/null
@@ -1,72 +0,0 @@
-#  Copyright (c): Wenyi Tang 2017-2019.
-#  Author: Wenyi Tang
-#  Email: wenyi.tang@intel.com
-#  Update Date: 2019/4/4 下午8:51
-
-import torch
-from torch import nn
-from torch.nn.functional import interpolate
-from ..Arch import Upsample
-
-
-class BilinerUp(nn.Module):
-  def __init__(self, scale_factor):
-    super(BilinerUp, self).__init__()
-    self.scale = scale_factor
-
-  def forward(self, x):
-    return interpolate(x, scale_factor=self.scale,
-                       mode='bilinear', align_corners=False)
-
-
-class FNet(nn.Module):
-  def __init__(self, channel, gain=32):
-    super(FNet, self).__init__()
-    f = 32
-    layers = []
-    in_c = channel * 2
-    for i in range(3):
-      layers += [nn.Conv2d(in_c, f, 3, 1, 1), nn.LeakyReLU(0.2, inplace=True)]
-      layers += [nn.Conv2d(f, f, 3, 1, 1), nn.LeakyReLU(0.2, inplace=True)]
-      layers += [nn.MaxPool2d(2)]
-      in_c = f
-      f *= 2
-    for i in range(3):
-      layers += [nn.Conv2d(in_c, f, 3, 1, 1), nn.LeakyReLU(0.2, inplace=True)]
-      layers += [nn.Conv2d(f, f, 3, 1, 1), nn.LeakyReLU(0.2, inplace=True)]
-      layers += [BilinerUp(2)]
-      in_c = f
-      f //= 2
-    layers += [nn.Conv2d(in_c, f, 3, 1, 1), nn.LeakyReLU(0.2, inplace=True)]
-    layers += [nn.Conv2d(f, 2, 3, 1, 1), nn.Tanh()]
-    self.body = nn.Sequential(*layers)
-    self.gain = gain
-
-  def forward(self, *inputs):
-    x = torch.cat(inputs, dim=1)
-    return self.body(x) * self.gain
-
-
-class RB(nn.Module):
-  def __init__(self, channel):
-    super(RB, self).__init__()
-    conv1 = nn.Conv2d(channel, channel, 3, 1, 1)
-    conv2 = nn.Conv2d(channel, channel, 3, 1, 1)
-    self.body = nn.Sequential(conv1, nn.ReLU(True), conv2)
-
-  def forward(self, x):
-    return x + self.body(x)
-
-
-class SRNet(nn.Module):
-  def __init__(self, channel, scale, n_rb=10):
-    super(SRNet, self).__init__()
-    rbs = [RB(64) for _ in range(n_rb)]
-    entry = [nn.Conv2d(channel * (scale ** 2 + 1), 64, 3, 1, 1), nn.ReLU(True)]
-    up = Upsample(64, scale, method='ps')
-    out = nn.Conv2d(64, channel, 3, 1, 1)
-    self.body = nn.Sequential(*entry, *rbs, up, out)
-
-  def forward(self, *inputs):
-    x = torch.cat(inputs, dim=1)
-    return self.body(x)
diff --git a/VSR/Backend/Torch/Models/msrn/__init__.py b/VSR/Backend/Torch/Models/msrn/__init__.py
deleted file mode 100644
index 163f12b..0000000
--- a/VSR/Backend/Torch/Models/msrn/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-#  Copyright (c): Wenyi Tang 2017-2019.
-#  Author: Wenyi Tang
-#  Email: wenyi.tang@intel.com
-#  Update Date: 2019 - 3 - 15
-
-import logging
-_logger = logging.getLogger("VSR.MSRN")
-_logger.info("LICENSE: MSRN is implemented by Juncheng Li. "
-             "@MIVRC https://github.com/MIVRC/MSRN-PyTorch")
diff --git a/VSR/Backend/Torch/Models/msrn/msrn.py b/VSR/Backend/Torch/Models/msrn/msrn.py
deleted file mode 100644
index 9540d0e..0000000
--- a/VSR/Backend/Torch/Models/msrn/msrn.py
+++ /dev/null
@@ -1,118 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ..rcan import common
-
-
-def make_model(args, parent=False):
-  return MSRN(args)
-
-
-class MSRB(nn.Module):
-  def __init__(self, conv=common.default_conv, n_feats=64):
-    super(MSRB, self).__init__()
-
-    kernel_size_1 = 3
-    kernel_size_2 = 5
-
-    self.conv_3_1 = conv(n_feats, n_feats, kernel_size_1)
-    self.conv_3_2 = conv(n_feats * 2, n_feats * 2, kernel_size_1)
-    self.conv_5_1 = conv(n_feats, n_feats, kernel_size_2)
-    self.conv_5_2 = conv(n_feats * 2, n_feats * 2, kernel_size_2)
-    self.confusion = nn.Conv2d(n_feats * 4, n_feats, 1, padding=0, stride=1)
-    self.relu = nn.ReLU(inplace=True)
-
-  def forward(self, x):
-    input_1 = x
-    output_3_1 = self.relu(self.conv_3_1(input_1))
-    output_5_1 = self.relu(self.conv_5_1(input_1))
-    input_2 = torch.cat([output_3_1, output_5_1], 1)
-    output_3_2 = self.relu(self.conv_3_2(input_2))
-    output_5_2 = self.relu(self.conv_5_2(input_2))
-    input_3 = torch.cat([output_3_2, output_5_2], 1)
-    output = self.confusion(input_3)
-    output += x
-    return output
-
-
-class MSRN(nn.Module):
-  def __init__(self, args, conv=common.default_conv):
-    super(MSRN, self).__init__()
-
-    n_feats = 64
-    n_blocks = 8
-    kernel_size = 3
-    scale = args.scale[0]
-    act = nn.ReLU(True)
-
-    self.n_blocks = n_blocks
-
-    # RGB mean for DIV2K
-    rgb_mean = (0.4488, 0.4371, 0.4040)
-    rgb_std = (1.0, 1.0, 1.0)
-    self.sub_mean = common.MeanShift(args.rgb_range, rgb_mean, rgb_std)
-
-    # define head module
-    modules_head = [conv(args.n_colors, n_feats, kernel_size)]
-
-    # define body module
-    modules_body = nn.ModuleList()
-    for i in range(n_blocks):
-      modules_body.append(
-        MSRB(n_feats=n_feats))
-
-    # define tail module
-    modules_tail = [
-      nn.Conv2d(n_feats * (self.n_blocks + 1), n_feats, 1, padding=0, stride=1),
-      conv(n_feats, n_feats, kernel_size),
-      common.Upsampler(conv, scale, n_feats, act=False),
-      conv(n_feats, args.n_colors, kernel_size)]
-
-    self.add_mean = common.MeanShift(args.rgb_range, rgb_mean, rgb_std, 1)
-
-    self.head = nn.Sequential(*modules_head)
-    self.body = nn.Sequential(*modules_body)
-    self.tail = nn.Sequential(*modules_tail)
-
-  def forward(self, x):
-    x = self.sub_mean(x)
-    x = self.head(x)
-    res = x
-
-    MSRB_out = []
-    for i in range(self.n_blocks):
-      x = self.body[i](x)
-      MSRB_out.append(x)
-    MSRB_out.append(res)
-
-    res = torch.cat(MSRB_out, 1)
-    x = self.tail(res)
-    x = self.add_mean(x)
-    return x
-
-  def load_state_dict(self, state_dict, strict=False):
-    own_state = self.state_dict()
-    for name, param in state_dict.items():
-      if name in own_state:
-        if isinstance(param, nn.Parameter):
-          param = param.data
-        try:
-          own_state[name].copy_(param)
-        except Exception:
-          if name.find('tail') >= 0:
-            print('Replace pre-trained upsampler to new one...')
-          else:
-            raise RuntimeError('While copying the parameter named {}, '
-                               'whose dimensions in the model are {} and '
-                               'whose dimensions in the checkpoint are {}.'
-                               .format(name, own_state[name].size(),
-                                       param.size()))
-      elif strict:
-        if name.find('tail') == -1:
-          raise KeyError('unexpected key "{}" in state_dict'
-                         .format(name))
-
-    if strict:
-      missing = set(own_state.keys()) - set(state_dict.keys())
-      if len(missing) > 0:
-        raise KeyError('missing keys in state_dict: "{}"'.format(missing))
diff --git a/VSR/Backend/Torch/Models/rbpn/__init__.py b/VSR/Backend/Torch/Models/rbpn/__init__.py
deleted file mode 100644
index 2c3ea08..0000000
--- a/VSR/Backend/Torch/Models/rbpn/__init__.py
+++ /dev/null
@@ -1,11 +0,0 @@
-#  Copyright (c): Wenyi Tang 2017-2019.
-#  Author: Wenyi Tang
-#  Email: wenyi.tang@intel.com
-#  Update Date: 2019/5/25 下午4:38
-
-import logging
-
-_logger = logging.getLogger("VSR.RBPN")
-_logger.info("LICENSE: RBPN is implemented by M. Haris, et. al. @alterzero")
-_logger.warning(
-  "I use unsupervised flownet to estimate optical flow, rather than pyflow module.")
diff --git a/VSR/Backend/Torch/Models/rbpn/base_network.py b/VSR/Backend/Torch/Models/rbpn/base_network.py
deleted file mode 100644
index df69c9f..0000000
--- a/VSR/Backend/Torch/Models/rbpn/base_network.py
+++ /dev/null
@@ -1,413 +0,0 @@
-import math
-import torch
-
-
-class DenseBlock(torch.nn.Module):
-  def __init__(self, input_size, output_size, bias=True, activation='relu',
-               norm='batch'):
-    super(DenseBlock, self).__init__()
-    self.fc = torch.nn.Linear(input_size, output_size, bias=bias)
-
-    self.norm = norm
-    if self.norm == 'batch':
-      self.bn = torch.nn.BatchNorm1d(output_size)
-    elif self.norm == 'instance':
-      self.bn = torch.nn.InstanceNorm1d(output_size)
-
-    self.activation = activation
-    if self.activation == 'relu':
-      self.act = torch.nn.ReLU(True)
-    elif self.activation == 'prelu':
-      self.act = torch.nn.PReLU()
-    elif self.activation == 'lrelu':
-      self.act = torch.nn.LeakyReLU(0.2, True)
-    elif self.activation == 'tanh':
-      self.act = torch.nn.Tanh()
-    elif self.activation == 'sigmoid':
-      self.act = torch.nn.Sigmoid()
-
-  def forward(self, x):
-    if self.norm is not None:
-      out = self.bn(self.fc(x))
-    else:
-      out = self.fc(x)
-
-    if self.activation is not None:
-      return self.act(out)
-    else:
-      return out
-
-
-class ConvBlock(torch.nn.Module):
-  def __init__(self, input_size, output_size, kernel_size=3, stride=1,
-               padding=1, bias=True, activation='prelu', norm=None):
-    super(ConvBlock, self).__init__()
-    self.conv = torch.nn.Conv2d(input_size, output_size, kernel_size, stride,
-                                padding, bias=bias)
-
-    self.norm = norm
-    if self.norm == 'batch':
-      self.bn = torch.nn.BatchNorm2d(output_size)
-    elif self.norm == 'instance':
-      self.bn = torch.nn.InstanceNorm2d(output_size)
-
-    self.activation = activation
-    if self.activation == 'relu':
-      self.act = torch.nn.ReLU(True)
-    elif self.activation == 'prelu':
-      self.act = torch.nn.PReLU()
-    elif self.activation == 'lrelu':
-      self.act = torch.nn.LeakyReLU(0.2, True)
-    elif self.activation == 'tanh':
-      self.act = torch.nn.Tanh()
-    elif self.activation == 'sigmoid':
-      self.act = torch.nn.Sigmoid()
-
-  def forward(self, x):
-    if self.norm is not None:
-      out = self.bn(self.conv(x))
-    else:
-      out = self.conv(x)
-
-    if self.activation is not None:
-      return self.act(out)
-    else:
-      return out
-
-
-class DeconvBlock(torch.nn.Module):
-  def __init__(self, input_size, output_size, kernel_size=4, stride=2,
-               padding=1, bias=True, activation='prelu', norm=None):
-    super(DeconvBlock, self).__init__()
-    self.deconv = torch.nn.ConvTranspose2d(input_size, output_size, kernel_size,
-                                           stride, padding, bias=bias)
-
-    self.norm = norm
-    if self.norm == 'batch':
-      self.bn = torch.nn.BatchNorm2d(output_size)
-    elif self.norm == 'instance':
-      self.bn = torch.nn.InstanceNorm2d(output_size)
-
-    self.activation = activation
-    if self.activation == 'relu':
-      self.act = torch.nn.ReLU(True)
-    elif self.activation == 'prelu':
-      self.act = torch.nn.PReLU()
-    elif self.activation == 'lrelu':
-      self.act = torch.nn.LeakyReLU(0.2, True)
-    elif self.activation == 'tanh':
-      self.act = torch.nn.Tanh()
-    elif self.activation == 'sigmoid':
-      self.act = torch.nn.Sigmoid()
-
-  def forward(self, x):
-    if self.norm is not None:
-      out = self.bn(self.deconv(x))
-    else:
-      out = self.deconv(x)
-
-    if self.activation is not None:
-      return self.act(out)
-    else:
-      return out
-
-
-class ResnetBlock(torch.nn.Module):
-  def __init__(self, num_filter, kernel_size=3, stride=1, padding=1, bias=True,
-               activation='prelu', norm='batch'):
-    super(ResnetBlock, self).__init__()
-    self.conv1 = torch.nn.Conv2d(num_filter, num_filter, kernel_size, stride,
-                                 padding, bias=bias)
-    self.conv2 = torch.nn.Conv2d(num_filter, num_filter, kernel_size, stride,
-                                 padding, bias=bias)
-
-    self.norm = norm
-    if self.norm == 'batch':
-      self.bn = torch.nn.BatchNorm2d(num_filter)
-    elif norm == 'instance':
-      self.bn = torch.nn.InstanceNorm2d(num_filter)
-
-    self.activation = activation
-    if self.activation == 'relu':
-      self.act = torch.nn.ReLU(True)
-    elif self.activation == 'prelu':
-      self.act = torch.nn.PReLU()
-    elif self.activation == 'lrelu':
-      self.act = torch.nn.LeakyReLU(0.2, True)
-    elif self.activation == 'tanh':
-      self.act = torch.nn.Tanh()
-    elif self.activation == 'sigmoid':
-      self.act = torch.nn.Sigmoid()
-
-  def forward(self, x):
-    residual = x
-    if self.norm is not None:
-      out = self.bn(self.conv1(x))
-    else:
-      out = self.conv1(x)
-
-    if self.activation is not None:
-      out = self.act(out)
-
-    if self.norm is not None:
-      out = self.bn(self.conv2(out))
-    else:
-      out = self.conv2(out)
-
-    out = torch.add(out, residual)
-
-    if self.activation is not None:
-      out = self.act(out)
-
-    return out
-
-
-class UpBlock(torch.nn.Module):
-  def __init__(self, num_filter, kernel_size=8, stride=4, padding=2, bias=True,
-               activation='prelu', norm=None):
-    super(UpBlock, self).__init__()
-    self.up_conv1 = DeconvBlock(num_filter, num_filter, kernel_size, stride,
-                                padding, activation, norm=None)
-    self.up_conv2 = ConvBlock(num_filter, num_filter, kernel_size, stride,
-                              padding, activation, norm=None)
-    self.up_conv3 = DeconvBlock(num_filter, num_filter, kernel_size, stride,
-                                padding, activation, norm=None)
-
-  def forward(self, x):
-    h0 = self.up_conv1(x)
-    l0 = self.up_conv2(h0)
-    h1 = self.up_conv3(l0 - x)
-    return h1 + h0
-
-
-class UpBlockPix(torch.nn.Module):
-  def __init__(self, num_filter, kernel_size=8, stride=4, padding=2, scale=4,
-               bias=True, activation='prelu', norm=None):
-    super(UpBlockPix, self).__init__()
-    self.up_conv1 = Upsampler(scale, num_filter)
-    self.up_conv2 = ConvBlock(num_filter, num_filter, kernel_size, stride,
-                              padding, activation, norm=None)
-    self.up_conv3 = Upsampler(scale, num_filter)
-
-  def forward(self, x):
-    h0 = self.up_conv1(x)
-    l0 = self.up_conv2(h0)
-    h1 = self.up_conv3(l0 - x)
-    return h1 + h0
-
-
-class D_UpBlock(torch.nn.Module):
-  def __init__(self, num_filter, kernel_size=8, stride=4, padding=2,
-               num_stages=1, bias=True, activation='prelu', norm=None):
-    super(D_UpBlock, self).__init__()
-    self.conv = ConvBlock(num_filter * num_stages, num_filter, 1, 1, 0,
-                          activation, norm=None)
-    self.up_conv1 = DeconvBlock(num_filter, num_filter, kernel_size, stride,
-                                padding, activation, norm=None)
-    self.up_conv2 = ConvBlock(num_filter, num_filter, kernel_size, stride,
-                              padding, activation, norm=None)
-    self.up_conv3 = DeconvBlock(num_filter, num_filter, kernel_size, stride,
-                                padding, activation, norm=None)
-
-  def forward(self, x):
-    x = self.conv(x)
-    h0 = self.up_conv1(x)
-    l0 = self.up_conv2(h0)
-    h1 = self.up_conv3(l0 - x)
-    return h1 + h0
-
-
-class D_UpBlockPix(torch.nn.Module):
-  def __init__(self, num_filter, kernel_size=8, stride=4, padding=2,
-               num_stages=1, scale=4, bias=True, activation='prelu', norm=None):
-    super(D_UpBlockPix, self).__init__()
-    self.conv = ConvBlock(num_filter * num_stages, num_filter, 1, 1, 0,
-                          activation, norm=None)
-    self.up_conv1 = Upsampler(scale, num_filter)
-    self.up_conv2 = ConvBlock(num_filter, num_filter, kernel_size, stride,
-                              padding, activation, norm=None)
-    self.up_conv3 = Upsampler(scale, num_filter)
-
-  def forward(self, x):
-    x = self.conv(x)
-    h0 = self.up_conv1(x)
-    l0 = self.up_conv2(h0)
-    h1 = self.up_conv3(l0 - x)
-    return h1 + h0
-
-
-class DownBlock(torch.nn.Module):
-  def __init__(self, num_filter, kernel_size=8, stride=4, padding=2, bias=True,
-               activation='prelu', norm=None):
-    super(DownBlock, self).__init__()
-    self.down_conv1 = ConvBlock(num_filter, num_filter, kernel_size, stride,
-                                padding, activation, norm=None)
-    self.down_conv2 = DeconvBlock(num_filter, num_filter, kernel_size, stride,
-                                  padding, activation, norm=None)
-    self.down_conv3 = ConvBlock(num_filter, num_filter, kernel_size, stride,
-                                padding, activation, norm=None)
-
-  def forward(self, x):
-    l0 = self.down_conv1(x)
-    h0 = self.down_conv2(l0)
-    l1 = self.down_conv3(h0 - x)
-    return l1 + l0
-
-
-class DownBlockPix(torch.nn.Module):
-  def __init__(self, num_filter, kernel_size=8, stride=4, padding=2, scale=4,
-               bias=True, activation='prelu', norm=None):
-    super(DownBlockPix, self).__init__()
-    self.down_conv1 = ConvBlock(num_filter, num_filter, kernel_size, stride,
-                                padding, activation, norm=None)
-    self.down_conv2 = Upsampler(scale, num_filter)
-    self.down_conv3 = ConvBlock(num_filter, num_filter, kernel_size, stride,
-                                padding, activation, norm=None)
-
-  def forward(self, x):
-    l0 = self.down_conv1(x)
-    h0 = self.down_conv2(l0)
-    l1 = self.down_conv3(h0 - x)
-    return l1 + l0
-
-
-class D_DownBlock(torch.nn.Module):
-  def __init__(self, num_filter, kernel_size=8, stride=4, padding=2,
-               num_stages=1, bias=True, activation='prelu', norm=None):
-    super(D_DownBlock, self).__init__()
-    self.conv = ConvBlock(num_filter * num_stages, num_filter, 1, 1, 0,
-                          activation, norm=None)
-    self.down_conv1 = ConvBlock(num_filter, num_filter, kernel_size, stride,
-                                padding, activation, norm=None)
-    self.down_conv2 = DeconvBlock(num_filter, num_filter, kernel_size, stride,
-                                  padding, activation, norm=None)
-    self.down_conv3 = ConvBlock(num_filter, num_filter, kernel_size, stride,
-                                padding, activation, norm=None)
-
-  def forward(self, x):
-    x = self.conv(x)
-    l0 = self.down_conv1(x)
-    h0 = self.down_conv2(l0)
-    l1 = self.down_conv3(h0 - x)
-    return l1 + l0
-
-
-class D_DownBlockPix(torch.nn.Module):
-  def __init__(self, num_filter, kernel_size=8, stride=4, padding=2,
-               num_stages=1, scale=4, bias=True, activation='prelu', norm=None):
-    super(D_DownBlockPix, self).__init__()
-    self.conv = ConvBlock(num_filter * num_stages, num_filter, 1, 1, 0,
-                          activation, norm=None)
-    self.down_conv1 = ConvBlock(num_filter, num_filter, kernel_size, stride,
-                                padding, activation, norm=None)
-    self.down_conv2 = Upsampler(scale, num_filter)
-    self.down_conv3 = ConvBlock(num_filter, num_filter, kernel_size, stride,
-                                padding, activation, norm=None)
-
-  def forward(self, x):
-    x = self.conv(x)
-    l0 = self.down_conv1(x)
-    h0 = self.down_conv2(l0)
-    l1 = self.down_conv3(h0 - x)
-    return l1 + l0
-
-
-class PSBlock(torch.nn.Module):
-  def __init__(self, input_size, output_size, scale_factor, kernel_size=3,
-               stride=1, padding=1, bias=True, activation='prelu',
-               norm='batch'):
-    super(PSBlock, self).__init__()
-    self.conv = torch.nn.Conv2d(input_size, output_size * scale_factor ** 2,
-                                kernel_size, stride, padding, bias=bias)
-    self.ps = torch.nn.PixelShuffle(scale_factor)
-
-    self.norm = norm
-    if self.norm == 'batch':
-      self.bn = torch.nn.BatchNorm2d(output_size)
-    elif norm == 'instance':
-      self.bn = torch.nn.InstanceNorm2d(output_size)
-
-    self.activation = activation
-    if self.activation == 'relu':
-      self.act = torch.nn.ReLU(True)
-    elif self.activation == 'prelu':
-      self.act = torch.nn.PReLU()
-    elif self.activation == 'lrelu':
-      self.act = torch.nn.LeakyReLU(0.2, True)
-    elif self.activation == 'tanh':
-      self.act = torch.nn.Tanh()
-    elif self.activation == 'sigmoid':
-      self.act = torch.nn.Sigmoid()
-
-  def forward(self, x):
-    if self.norm is not None:
-      out = self.bn(self.ps(self.conv(x)))
-    else:
-      out = self.ps(self.conv(x))
-
-    if self.activation is not None:
-      out = self.act(out)
-    return out
-
-
-class Upsampler(torch.nn.Module):
-  def __init__(self, scale, n_feat, bn=False, act='prelu', bias=True):
-    super(Upsampler, self).__init__()
-    modules = []
-    for _ in range(int(math.log(scale, 2))):
-      modules.append(
-        ConvBlock(n_feat, 4 * n_feat, 3, 1, 1, bias, activation=None,
-                  norm=None))
-      modules.append(torch.nn.PixelShuffle(2))
-      if bn: modules.append(torch.nn.BatchNorm2d(n_feat))
-      # modules.append(torch.nn.PReLU())
-    self.up = torch.nn.Sequential(*modules)
-
-    self.activation = act
-    if self.activation == 'relu':
-      self.act = torch.nn.ReLU(True)
-    elif self.activation == 'prelu':
-      self.act = torch.nn.PReLU()
-    elif self.activation == 'lrelu':
-      self.act = torch.nn.LeakyReLU(0.2, True)
-    elif self.activation == 'tanh':
-      self.act = torch.nn.Tanh()
-    elif self.activation == 'sigmoid':
-      self.act = torch.nn.Sigmoid()
-
-  def forward(self, x):
-    out = self.up(x)
-    if self.activation is not None:
-      out = self.act(out)
-    return out
-
-
-class Upsample2xBlock(torch.nn.Module):
-  def __init__(self, input_size, output_size, bias=True, upsample='deconv',
-               activation='relu', norm='batch'):
-    super(Upsample2xBlock, self).__init__()
-    scale_factor = 2
-    # 1. Deconvolution (Transposed convolution)
-    if upsample == 'deconv':
-      self.upsample = DeconvBlock(input_size, output_size,
-                                  kernel_size=4, stride=2, padding=1,
-                                  bias=bias, activation=activation, norm=norm)
-
-    # 2. Sub-pixel convolution (Pixel shuffler)
-    elif upsample == 'ps':
-      self.upsample = PSBlock(input_size, output_size,
-                              scale_factor=scale_factor,
-                              bias=bias, activation=activation, norm=norm)
-
-    # 3. Resize and Convolution
-    elif upsample == 'rnc':
-      self.upsample = torch.nn.Sequential(
-        torch.nn.Upsample(scale_factor=scale_factor, mode='nearest'),
-        ConvBlock(input_size, output_size,
-                  kernel_size=3, stride=1, padding=1,
-                  bias=bias, activation=activation, norm=norm)
-      )
-
-  def forward(self, x):
-    out = self.upsample(x)
-    return out
diff --git a/VSR/Backend/Torch/Models/rbpn/ops.py b/VSR/Backend/Torch/Models/rbpn/ops.py
deleted file mode 100644
index 95a7bb5..0000000
--- a/VSR/Backend/Torch/Models/rbpn/ops.py
+++ /dev/null
@@ -1,157 +0,0 @@
-import torch.nn as nn
-
-from .base_network import *
-
-
-class Dbpns(nn.Module):
-  def __init__(self, base_filter, feat, num_stages, scale_factor):
-    super(Dbpns, self).__init__()
-
-    if scale_factor == 2:
-      kernel = 6
-      stride = 2
-      padding = 2
-    elif scale_factor == 4:
-      kernel = 8
-      stride = 4
-      padding = 2
-    elif scale_factor == 8:
-      kernel = 12
-      stride = 8
-      padding = 2
-
-    # Initial Feature Extraction
-    # self.feat0 = ConvBlock(num_channels, feat, 3, 1, 1, activation='prelu', norm=None)
-    self.feat1 = ConvBlock(base_filter, feat, 1, 1, 0, activation='prelu',
-                           norm=None)
-    # Back-projection stages
-    self.up1 = UpBlock(feat, kernel, stride, padding)
-    self.down1 = DownBlock(feat, kernel, stride, padding)
-    self.up2 = UpBlock(feat, kernel, stride, padding)
-    self.down2 = DownBlock(feat, kernel, stride, padding)
-    self.up3 = UpBlock(feat, kernel, stride, padding)
-    # Reconstruction
-    self.output = ConvBlock(num_stages * feat, feat, 1, 1, 0, activation=None,
-                            norm=None)
-
-    for m in self.modules():
-      classname = m.__class__.__name__
-      if classname.find('Conv2d') != -1:
-        torch.nn.init.kaiming_normal_(m.weight)
-        if m.bias is not None:
-          m.bias.data.zero_()
-      elif classname.find('ConvTranspose2d') != -1:
-        torch.nn.init.kaiming_normal_(m.weight)
-        if m.bias is not None:
-          m.bias.data.zero_()
-
-  def forward(self, x):
-    # x = self.feat0(x)
-    x = self.feat1(x)
-
-    h1 = self.up1(x)
-    h2 = self.up2(self.down1(h1))
-    h3 = self.up3(self.down2(h2))
-
-    x = self.output(torch.cat((h3, h2, h1), 1))
-
-    return x
-
-
-class Rbpn(nn.Module):
-  def __init__(self, num_channels, base_filter, feat, num_stages, n_resblock,
-               nFrames, scale_factor):
-    super(Rbpn, self).__init__()
-    # base_filter=256
-    # feat=64
-    self.nFrames = nFrames
-
-    if scale_factor == 2:
-      kernel = 6
-      stride = 2
-      padding = 2
-    elif scale_factor == 4:
-      kernel = 8
-      stride = 4
-      padding = 2
-    elif scale_factor == 8:
-      kernel = 12
-      stride = 8
-      padding = 2
-
-    # Initial Feature Extraction
-    self.feat0 = ConvBlock(num_channels, base_filter, 3, 1, 1,
-                           activation='prelu', norm=None)
-    self.feat1 = ConvBlock(8, base_filter, 3, 1, 1, activation='prelu',
-                           norm=None)
-
-    ###DBPNS
-    self.DBPN = Dbpns(base_filter, feat, num_stages, scale_factor)
-
-    # Res-Block1
-    modules_body1 = [
-      ResnetBlock(base_filter, kernel_size=3, stride=1, padding=1, bias=True,
-                  activation='prelu', norm=None) \
-      for _ in range(n_resblock)]
-    modules_body1.append(
-      DeconvBlock(base_filter, feat, kernel, stride, padding,
-                  activation='prelu', norm=None))
-    self.res_feat1 = nn.Sequential(*modules_body1)
-
-    # Res-Block2
-    modules_body2 = [
-      ResnetBlock(feat, kernel_size=3, stride=1, padding=1, bias=True,
-                  activation='prelu', norm=None) \
-      for _ in range(n_resblock)]
-    modules_body2.append(
-      ConvBlock(feat, feat, 3, 1, 1, activation='prelu', norm=None))
-    self.res_feat2 = nn.Sequential(*modules_body2)
-
-    # Res-Block3
-    modules_body3 = [
-      ResnetBlock(feat, kernel_size=3, stride=1, padding=1, bias=True,
-                  activation='prelu', norm=None) \
-      for _ in range(n_resblock)]
-    modules_body3.append(ConvBlock(feat, base_filter, kernel, stride, padding,
-                                   activation='prelu', norm=None))
-    self.res_feat3 = nn.Sequential(*modules_body3)
-
-    # Reconstruction
-    self.output = ConvBlock((nFrames - 1) * feat, num_channels, 3, 1, 1,
-                            activation=None, norm=None)
-
-    for m in self.modules():
-      classname = m.__class__.__name__
-      if classname.find('Conv2d') != -1:
-        torch.nn.init.kaiming_normal_(m.weight)
-        if m.bias is not None:
-          m.bias.data.zero_()
-      elif classname.find('ConvTranspose2d') != -1:
-        torch.nn.init.kaiming_normal_(m.weight)
-        if m.bias is not None:
-          m.bias.data.zero_()
-
-  def forward(self, x, neigbor, flow):
-    ### initial feature extraction
-    feat_input = self.feat0(x)
-    feat_frame = []
-    for j in range(len(neigbor)):
-      feat_frame.append(self.feat1(torch.cat((x, neigbor[j], flow[j]), 1)))
-
-    ####Projection
-    Ht = []
-    for j in range(len(neigbor)):
-      h0 = self.DBPN(feat_input)
-      h1 = self.res_feat1(feat_frame[j])
-
-      e = h0 - h1
-      e = self.res_feat2(e)
-      h = h0 + e
-      Ht.append(h)
-      feat_input = self.res_feat3(h)
-
-    ####Reconstruction
-    out = torch.cat(Ht, 1)
-    output = self.output(out)
-
-    return output
diff --git a/VSR/Backend/Torch/Models/rcan/__init__.py b/VSR/Backend/Torch/Models/rcan/__init__.py
deleted file mode 100644
index a910893..0000000
--- a/VSR/Backend/Torch/Models/rcan/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-#  Copyright (c): Wenyi Tang 2017-2019.
-#  Author: Wenyi Tang
-#  Email: wenyi.tang@intel.com
-#  Update Date: 2019 - 3 - 15
-
-import logging
-_logger = logging.getLogger("VSR.RCAN")
-_logger.info("LICENSE: RCAN is implemented by Yulun Zhang. "
-             "@yulunzhang https://github.com/yulunzhang/RCAN.")
diff --git a/VSR/Backend/Torch/Models/rcan/common.py b/VSR/Backend/Torch/Models/rcan/common.py
deleted file mode 100644
index 8aa0cee..0000000
--- a/VSR/Backend/Torch/Models/rcan/common.py
+++ /dev/null
@@ -1,78 +0,0 @@
-import math
-
-import torch
-import torch.nn as nn
-
-
-def default_conv(in_channels, out_channels, kernel_size, bias=True):
-  return nn.Conv2d(
-    in_channels, out_channels, kernel_size,
-    padding=(kernel_size // 2), bias=bias)
-
-
-class MeanShift(nn.Conv2d):
-  def __init__(self, rgb_range, rgb_mean, rgb_std, sign=-1):
-    super(MeanShift, self).__init__(3, 3, kernel_size=1)
-    std = torch.Tensor(rgb_std)
-    self.weight.data = torch.eye(3).view(3, 3, 1, 1)
-    self.weight.data.div_(std.view(3, 1, 1, 1))
-    self.bias.data = sign * rgb_range * torch.Tensor(rgb_mean)
-    self.bias.data.div_(std)
-    self.requires_grad = False
-
-
-class BasicBlock(nn.Sequential):
-  def __init__(
-      self, in_channels, out_channels, kernel_size, stride=1, bias=False,
-      bn=True, act=nn.ReLU(True)):
-
-    m = [nn.Conv2d(
-      in_channels, out_channels, kernel_size,
-      padding=(kernel_size // 2), stride=stride, bias=bias)
-    ]
-    if bn: m.append(nn.BatchNorm2d(out_channels))
-    if act is not None: m.append(act)
-    super(BasicBlock, self).__init__(*m)
-
-
-class ResBlock(nn.Module):
-  def __init__(
-      self, conv, n_feat, kernel_size,
-      bias=True, bn=False, act=nn.ReLU(True), res_scale=1):
-
-    super(ResBlock, self).__init__()
-    m = []
-    for i in range(2):
-      m.append(conv(n_feat, n_feat, kernel_size, bias=bias))
-      if bn: m.append(nn.BatchNorm2d(n_feat))
-      if i == 0: m.append(act)
-
-    self.body = nn.Sequential(*m)
-    self.res_scale = res_scale
-
-  def forward(self, x):
-    res = self.body(x).mul(self.res_scale)
-    res += x
-
-    return res
-
-
-class Upsampler(nn.Sequential):
-  def __init__(self, conv, scale, n_feat, bn=False, act=False, bias=True):
-
-    m = []
-    if (scale & (scale - 1)) == 0:  # Is scale = 2^n?
-      for _ in range(int(math.log(scale, 2))):
-        m.append(conv(n_feat, 4 * n_feat, 3, bias))
-        m.append(nn.PixelShuffle(2))
-        if bn: m.append(nn.BatchNorm2d(n_feat))
-        if act: m.append(act())
-    elif scale == 3:
-      m.append(conv(n_feat, 9 * n_feat, 3, bias))
-      m.append(nn.PixelShuffle(3))
-      if bn: m.append(nn.BatchNorm2d(n_feat))
-      if act: m.append(act())
-    else:
-      raise NotImplementedError
-
-    super(Upsampler, self).__init__(*m)
diff --git a/VSR/Backend/Torch/Models/rcan/rcan.py b/VSR/Backend/Torch/Models/rcan/rcan.py
deleted file mode 100644
index 02b2488..0000000
--- a/VSR/Backend/Torch/Models/rcan/rcan.py
+++ /dev/null
@@ -1,151 +0,0 @@
-import torch.nn as nn
-
-from . import common
-
-
-def make_model(args, parent=False):
-  return RCAN(args)
-
-
-## Channel Attention (CA) Layer
-class CALayer(nn.Module):
-  def __init__(self, channel, reduction=16):
-    super(CALayer, self).__init__()
-    # global average pooling: feature --> point
-    self.avg_pool = nn.AdaptiveAvgPool2d(1)
-    # feature channel downscale and upscale --> channel weight
-    self.conv_du = nn.Sequential(
-      nn.Conv2d(channel, channel // reduction, 1, padding=0, bias=True),
-      nn.ReLU(inplace=True),
-      nn.Conv2d(channel // reduction, channel, 1, padding=0, bias=True),
-      nn.Sigmoid()
-    )
-
-  def forward(self, x):
-    y = self.avg_pool(x)
-    y = self.conv_du(y)
-    return x * y
-
-
-## Residual Channel Attention Block (RCAB)
-class RCAB(nn.Module):
-  def __init__(
-      self, conv, n_feat, kernel_size, reduction,
-      bias=True, bn=False, act=nn.ReLU(True), res_scale=1):
-
-    super(RCAB, self).__init__()
-    modules_body = []
-    for i in range(2):
-      modules_body.append(conv(n_feat, n_feat, kernel_size, bias=bias))
-      if bn: modules_body.append(nn.BatchNorm2d(n_feat))
-      if i == 0: modules_body.append(act)
-    modules_body.append(CALayer(n_feat, reduction))
-    self.body = nn.Sequential(*modules_body)
-    self.res_scale = res_scale
-
-  def forward(self, x):
-    res = self.body(x)
-    # res = self.body(x).mul(self.res_scale)
-    res += x
-    return res
-
-
-## Residual Group (RG)
-class ResidualGroup(nn.Module):
-  def __init__(self, conv, n_feat, kernel_size, reduction, act, res_scale,
-               n_resblocks):
-    super(ResidualGroup, self).__init__()
-    modules_body = []
-    modules_body = [
-      RCAB(
-        conv, n_feat, kernel_size, reduction, bias=True, bn=False,
-        act=nn.ReLU(True), res_scale=1) \
-      for _ in range(n_resblocks)]
-    modules_body.append(conv(n_feat, n_feat, kernel_size))
-    self.body = nn.Sequential(*modules_body)
-
-  def forward(self, x):
-    res = self.body(x)
-    res += x
-    return res
-
-
-## Residual Channel Attention Network (RCAN)
-class RCAN(nn.Module):
-  def __init__(self, args, conv=common.default_conv):
-    super(RCAN, self).__init__()
-
-    n_resgroups = args.n_resgroups
-    n_resblocks = args.n_resblocks
-    n_feats = args.n_feats
-    kernel_size = 3
-    reduction = args.reduction
-    scale = args.scale[0]
-    act = nn.ReLU(True)
-
-    # RGB mean for DIV2K
-    rgb_mean = (0.4488, 0.4371, 0.4040)
-    rgb_std = (1.0, 1.0, 1.0)
-    self.sub_mean = common.MeanShift(args.rgb_range, rgb_mean, rgb_std)
-
-    # define head module
-    modules_head = [conv(args.n_colors, n_feats, kernel_size)]
-
-    # define body module
-    modules_body = [
-      ResidualGroup(
-        conv, n_feats, kernel_size, reduction, act=act,
-        res_scale=args.res_scale, n_resblocks=n_resblocks) \
-      for _ in range(n_resgroups)]
-
-    modules_body.append(conv(n_feats, n_feats, kernel_size))
-
-    # define tail module
-    modules_tail = [
-      common.Upsampler(conv, scale, n_feats, act=False),
-      conv(n_feats, args.n_colors, kernel_size)]
-
-    self.add_mean = common.MeanShift(args.rgb_range, rgb_mean, rgb_std, 1)
-
-    self.head = nn.Sequential(*modules_head)
-    self.body = nn.Sequential(*modules_body)
-    self.tail = nn.Sequential(*modules_tail)
-
-  def forward(self, x):
-    x = self.sub_mean(x)
-    x = self.head(x)
-
-    res = self.body(x)
-    res += x
-
-    x = self.tail(res)
-    x = self.add_mean(x)
-
-    return x
-
-  def load_state_dict(self, state_dict, strict=False):
-    own_state = self.state_dict()
-    for name, param in state_dict.items():
-      if name in own_state:
-        if isinstance(param, nn.Parameter):
-          param = param.data
-        try:
-          own_state[name].copy_(param)
-        except Exception:
-          if name.find('tail') >= 0:
-            print('Replace pre-trained upsampler to new one...')
-          else:
-            raise RuntimeError('While copying the parameter named {}, '
-                               'whose dimensions in the model are {} and '
-                               'whose dimensions in the checkpoint are {}.'
-                               .format(name, own_state[name].size(),
-                                       param.size()))
-      elif strict:
-        if name.find('tail') == -1:
-          raise KeyError('unexpected key "{}" in state_dict'
-                         .format(name))
-
-    if strict:
-      missing = set(own_state.keys()) - set(state_dict.keys())
-      if len(missing) > 0:
-        raise KeyError('missing keys in state_dict: "{}"'.format(missing))
diff --git a/VSR/Backend/Torch/Models/sof/__init__.py b/VSR/Backend/Torch/Models/sof/__init__.py
deleted file mode 100644
index c935ba4..0000000
--- a/VSR/Backend/Torch/Models/sof/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-#  Copyright (c): Wenyi Tang 2017-2019.
-#  Author: Wenyi Tang
-#  Email: wenyi.tang@intel.com
-#  Update Date: 2019 - 3 - 22
-
-import logging
-_logger = logging.getLogger("VSR.SOF")
-_logger.info("LICENSE: SOF-VSR is implemented by Longguan Wang. "
-             "@LongguanWang https://github.com/LongguangWang/SOF-VSR.")
diff --git a/VSR/Backend/Torch/Models/sof/modules.py b/VSR/Backend/Torch/Models/sof/modules.py
deleted file mode 100644
index 7e5fa2a..0000000
--- a/VSR/Backend/Torch/Models/sof/modules.py
+++ /dev/null
@@ -1,201 +0,0 @@
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.autograd import Variable
-
-
-def optical_flow_warp(image, image_optical_flow):
-  """
-  Arguments
-      image_ref: reference images tensor, (b, c, h, w)
-      image_optical_flow: optical flow to image_ref (b, 2, h, w)
-  """
-  b, _, h, w = image.size()
-  grid = np.meshgrid(range(w), range(h))
-  grid = np.stack(grid, axis=-1).astype(np.float64)
-  grid[:, :, 0] = grid[:, :, 0] * 2 / (w - 1) - 1
-  grid[:, :, 1] = grid[:, :, 1] * 2 / (h - 1) - 1
-  grid = grid.transpose(2, 0, 1)
-  grid = np.tile(grid, (b, 1, 1, 1))
-  grid = Variable(torch.Tensor(grid))
-  if image_optical_flow.is_cuda == True:
-    grid = grid.cuda()
-
-  flow_0 = torch.unsqueeze(image_optical_flow[:, 0, :, :] * 31 / (w - 1), dim=1)
-  flow_1 = torch.unsqueeze(image_optical_flow[:, 1, :, :] * 31 / (h - 1), dim=1)
-  grid = grid + torch.cat((flow_0, flow_1), 1)
-  grid = grid.transpose(1, 2)
-  grid = grid.transpose(3, 2)
-  output = F.grid_sample(image, grid, padding_mode='border')
-  return output
-
-
-class make_dense(nn.Module):
-  def __init__(self, channels_in, channels_out, kernel_size=3):
-    super(make_dense, self).__init__()
-    self.leaky_relu = nn.LeakyReLU(0.1, inplace=True)
-    self.conv = nn.Conv2d(channels_in, channels_out, kernel_size=kernel_size,
-                          padding=(kernel_size - 1) // 2,
-                          bias=False)
-
-  def forward(self, x):
-    out = self.leaky_relu(self.conv(x))
-    out = torch.cat((x, out), 1)
-    return out
-
-
-class RDB(nn.Module):
-  def __init__(self, nDenselayer, channels, growth):
-    super(RDB, self).__init__()
-    modules = []
-    channels_buffer = channels
-    for i in range(nDenselayer):
-      modules.append(make_dense(channels_buffer, growth))
-      channels_buffer += growth
-    self.dense_layers = nn.Sequential(*modules)
-    self.conv_1x1 = nn.Conv2d(channels_buffer, channels, kernel_size=1,
-                              padding=0, bias=False)
-
-  def forward(self, x):
-    out = self.dense_layers(x)
-    out = self.conv_1x1(out)
-    out = out + x
-    return out
-
-
-class OFRnet(nn.Module):
-  def __init__(self, upscale_factor):
-    super(OFRnet, self).__init__()
-    self.pool = nn.AvgPool2d(kernel_size=2)
-    self.upsample = nn.Upsample(scale_factor=2, mode='bilinear',
-                                align_corners=False)
-    self.final_upsample = nn.Upsample(scale_factor=upscale_factor,
-                                      mode='bilinear', align_corners=False)
-    self.shuffle = nn.PixelShuffle(upscale_factor)
-    self.upscale_factor = upscale_factor
-    # Level 1
-    self.conv_L1_1 = nn.Conv2d(2, 32, 3, 1, 1, bias=False)
-    self.RDB1_1 = RDB(4, 32, 32)
-    self.RDB1_2 = RDB(4, 32, 32)
-    self.bottleneck_L1 = nn.Conv2d(64, 2, 3, 1, 1, bias=False)
-    self.conv_L1_2 = nn.Conv2d(2, 2, 3, 1, 1, bias=True)
-    # Level 2
-    self.conv_L2_1 = nn.Conv2d(6, 32, 3, 1, 1, bias=False)
-    self.RDB2_1 = RDB(4, 32, 32)
-    self.RDB2_2 = RDB(4, 32, 32)
-    self.bottleneck_L2 = nn.Conv2d(64, 2, 3, 1, 1, bias=False)
-    self.conv_L2_2 = nn.Conv2d(2, 2, 3, 1, 1, bias=True)
-    # Level 3
-    self.conv_L3_1 = nn.Conv2d(6, 32, 3, 1, 1, bias=False)
-    self.RDB3_1 = RDB(4, 32, 32)
-    self.RDB3_2 = RDB(4, 32, 32)
-    self.bottleneck_L3 = nn.Conv2d(64, 2 * upscale_factor ** 2, 3, 1, 1,
-                                   bias=False)
-    self.conv_L3_2 = nn.Conv2d(2 * upscale_factor ** 2, 2 * upscale_factor ** 2,
-                               3, 1, 1, bias=True)
-
-  def forward(self, x):
-    # Level 1
-    x_L1 = self.pool(x)
-    _, _, h, w = x_L1.size()
-    input_L1 = self.conv_L1_1(x_L1)
-    buffer_1 = self.RDB1_1(input_L1)
-    buffer_2 = self.RDB1_2(buffer_1)
-    buffer = torch.cat((buffer_1, buffer_2), 1)
-    optical_flow_L1 = self.bottleneck_L1(buffer)
-    optical_flow_L1 = self.conv_L1_2(optical_flow_L1)
-    optical_flow_L1_upscaled = self.upsample(optical_flow_L1)  # *2
-    # x_L1_res = optical_flow_warp(torch.unsqueeze(x_L1[:, 0, :, :], dim=1), optical_flow_L1) - torch.unsqueeze(x_L1[:, 1, :, :], dim=1)
-    # Level 2
-    x_L2 = optical_flow_warp(torch.unsqueeze(x[:, 0, :, :], dim=1),
-                             optical_flow_L1_upscaled)
-    x_L2_res = torch.unsqueeze(x[:, 1, :, :], dim=1) - x_L2
-    x_L2 = torch.cat((x, x_L2, x_L2_res, optical_flow_L1_upscaled), 1)
-    input_L2 = self.conv_L2_1(x_L2)
-    buffer_1 = self.RDB2_1(input_L2)
-    buffer_2 = self.RDB2_2(buffer_1)
-    buffer = torch.cat((buffer_1, buffer_2), 1)
-    optical_flow_L2 = self.bottleneck_L2(buffer)
-    optical_flow_L2 = self.conv_L2_2(optical_flow_L2)
-    optical_flow_L2 = optical_flow_L2 + optical_flow_L1_upscaled
-    # x_L2_res = optical_flow_warp(torch.unsqueeze(x_L2[:, 0, :, :], dim=1), optical_flow_L2) - torch.unsqueeze(x_L2[:, 1, :, :], dim=1)
-    # Level 3
-    x_L3 = optical_flow_warp(torch.unsqueeze(x[:, 0, :, :], dim=1),
-                             optical_flow_L2)
-    x_L3_res = torch.unsqueeze(x[:, 1, :, :], dim=1) - x_L3
-    x_L3 = torch.cat((x, x_L3, x_L3_res, optical_flow_L2), 1)
-    input_L3 = self.conv_L3_1(x_L3)
-    buffer_1 = self.RDB3_1(input_L3)
-    buffer_2 = self.RDB3_2(buffer_1)
-    buffer = torch.cat((buffer_1, buffer_2), 1)
-    optical_flow_L3 = self.bottleneck_L3(buffer)
-    optical_flow_L3 = self.conv_L3_2(optical_flow_L3)
-    optical_flow_L3 = self.shuffle(optical_flow_L3) + self.final_upsample(
-      optical_flow_L2)  # *4
-
-    return optical_flow_L3, optical_flow_L2, optical_flow_L1
-
-
-class SRnet(nn.Module):
-  def __init__(self, s, c, d):
-    """
-    Args:
-      s: scale factor
-      c: channel numbers
-      d: video sequence number
-    """
-    super(SRnet, self).__init__()
-    self.conv = nn.Conv2d(c * (2 * s ** 2 + d), 64, 3, 1, 1, bias=False)
-    self.RDB_1 = RDB(5, 64, 32)
-    self.RDB_2 = RDB(5, 64, 32)
-    self.RDB_3 = RDB(5, 64, 32)
-    self.RDB_4 = RDB(5, 64, 32)
-    self.RDB_5 = RDB(5, 64, 32)
-    self.bottleneck = nn.Conv2d(384, c * s ** 2, 1, 1, 0, bias=False)
-    self.conv_2 = nn.Conv2d(c * s ** 2, c * s ** 2, 3, 1, 1, bias=True)
-    self.shuffle = nn.PixelShuffle(upscale_factor=s)
-
-  def forward(self, x):
-    input = self.conv(x)
-    buffer_1 = self.RDB_1(input)
-    buffer_2 = self.RDB_2(buffer_1)
-    buffer_3 = self.RDB_3(buffer_2)
-    buffer_4 = self.RDB_4(buffer_3)
-    buffer_5 = self.RDB_5(buffer_4)
-    output = torch.cat(
-      (buffer_1, buffer_2, buffer_3, buffer_4, buffer_5, input), 1)
-    output = self.bottleneck(output)
-    output = self.conv_2(output)
-    output = self.shuffle(output)
-    return output
-
-
-class SOFVSR(nn.Module):
-  def __init__(self, scale, channel, depth):
-    super(SOFVSR, self).__init__()
-    self.upscale_factor = scale
-    self.c = channel
-    self.OFRnet = OFRnet(upscale_factor=scale)
-    self.SRnet = SRnet(scale, channel, depth)
-
-  def forward(self, x):
-    input_01 = torch.cat((torch.unsqueeze(x[:, 0, :, :], dim=1),
-                          torch.unsqueeze(x[:, 1, :, :], dim=1)), 1)
-    input_21 = torch.cat((torch.unsqueeze(x[:, 2, :, :], dim=1),
-                          torch.unsqueeze(x[:, 1, :, :], dim=1)), 1)
-    flow_01_L3, flow_01_L2, flow_01_L1 = self.OFRnet(input_01)
-    flow_21_L3, flow_21_L2, flow_21_L1 = self.OFRnet(input_21)
-    draft_cube = x
-    for i in range(self.upscale_factor):
-      for j in range(self.upscale_factor):
-        draft_01 = optical_flow_warp(x[:, :self.c, :, :],
-                                     flow_01_L3[:, :, i::self.upscale_factor,
-                                     j::self.upscale_factor] / self.upscale_factor)
-        draft_21 = optical_flow_warp(x[:, self.c * 2:, :, :],
-                                     flow_21_L3[:, :, i::self.upscale_factor,
-                                     j::self.upscale_factor] / self.upscale_factor)
-        draft_cube = torch.cat((draft_cube, draft_01, draft_21), 1)
-    output = self.SRnet(draft_cube)
-    return output, (flow_01_L3, flow_01_L2, flow_01_L1), (
-      flow_21_L3, flow_21_L2, flow_21_L1)
diff --git a/VSR/Backend/Torch/Models/spmc/__init__.py b/VSR/Backend/Torch/Models/spmc/__init__.py
deleted file mode 100644
index f5bc9db..0000000
--- a/VSR/Backend/Torch/Models/spmc/__init__.py
+++ /dev/null
@@ -1,11 +0,0 @@
-#  Copyright (c): Wenyi Tang 2017-2019.
-#  Author: Wenyi Tang
-#  Email: wenyi.tang@intel.com
-#  Update Date: 2019/5/25 下午4:38
-
-import logging
-
-_logger = logging.getLogger("VSR.SPMC")
-_logger.info("LICENSE: SPMC is proposed by X. Tao, et. al. "
-             "Implemented via PyTorch by @LoSealL.")
-_logger.info("LICENSE: ConvLSTM is implemented by @Kaixhin.")
diff --git a/VSR/Backend/Torch/Models/spmc/ops.py b/VSR/Backend/Torch/Models/spmc/ops.py
deleted file mode 100644
index b6d1895..0000000
--- a/VSR/Backend/Torch/Models/spmc/ops.py
+++ /dev/null
@@ -1,225 +0,0 @@
-#  Copyright (c): Wenyi Tang 2017-2019.
-#  Author: Wenyi Tang
-#  Email: wenyi.tang@intel.com
-#  Update Date: 2019/5/26 上午11:39
-
-import math
-import torch
-from torch import nn
-from torch.autograd import Variable
-from torch.nn import Parameter
-from torch.nn import functional as F
-from torch.nn.modules.utils import _pair
-
-from ..Arch import EasyConv2d
-from ..video.motion import STN
-from ...Util.Utility import upsample
-
-
-class Conv2dLSTMCell(nn.Module):
-  """ConvLSTM cell.
-  Copied from https://gist.github.com/Kaixhin/57901e91e5c5a8bac3eb0cbbdd3aba81
-  Special thanks to @Kaixhin
-  """
-
-  def __init__(self, in_channels, out_channels, kernel_size, stride=1,
-               padding=0, dilation=1, groups=1, bias=True):
-
-    super(Conv2dLSTMCell, self).__init__()
-    if in_channels % groups != 0:
-      raise ValueError('in_channels must be divisible by groups')
-    if out_channels % groups != 0:
-      raise ValueError('out_channels must be divisible by groups')
-    kernel_size = _pair(kernel_size)
-    stride = _pair(stride)
-    padding = _pair(padding)
-    dilation = _pair(dilation)
-    self.in_channels = in_channels
-    self.out_channels = out_channels
-    self.kernel_size = kernel_size
-    self.stride = stride
-    self.padding = padding
-    self.padding_h = tuple(
-      k // 2 for k, s, p, d in zip(kernel_size, stride, padding, dilation))
-    self.dilation = dilation
-    self.groups = groups
-    self.weight_ih = Parameter(
-      torch.Tensor(4 * out_channels, in_channels // groups, *kernel_size))
-    self.weight_hh = Parameter(
-      torch.Tensor(4 * out_channels, out_channels // groups, *kernel_size))
-    self.weight_ch = Parameter(
-      torch.Tensor(3 * out_channels, out_channels // groups, *kernel_size))
-    if bias:
-      self.bias_ih = Parameter(torch.Tensor(4 * out_channels))
-      self.bias_hh = Parameter(torch.Tensor(4 * out_channels))
-      self.bias_ch = Parameter(torch.Tensor(3 * out_channels))
-    else:
-      self.register_parameter('bias_ih', None)
-      self.register_parameter('bias_hh', None)
-      self.register_parameter('bias_ch', None)
-    self.register_buffer('wc_blank', torch.zeros(out_channels))
-    self.reset_parameters()
-
-  def reset_parameters(self):
-    n = 4 * self.in_channels
-    for k in self.kernel_size:
-      n *= k
-    stdv = 1. / math.sqrt(n)
-    self.weight_ih.data.uniform_(-stdv, stdv)
-    self.weight_hh.data.uniform_(-stdv, stdv)
-    self.weight_ch.data.uniform_(-stdv, stdv)
-    if self.bias_ih is not None:
-      self.bias_ih.data.uniform_(-stdv, stdv)
-      self.bias_hh.data.uniform_(-stdv, stdv)
-      self.bias_ch.data.uniform_(-stdv, stdv)
-
-  def forward(self, input, hx):
-    h_0, c_0 = hx
-
-    wx = F.conv2d(input, self.weight_ih, self.bias_ih, self.stride,
-                  self.padding, self.dilation, self.groups)
-    wh = F.conv2d(h_0, self.weight_hh, self.bias_hh, self.stride,
-                  self.padding_h, self.dilation, self.groups)
-    # Cell uses a Hadamard product instead of a convolution?
-    wc = F.conv2d(c_0, self.weight_ch, self.bias_ch, self.stride,
-                  self.padding_h, self.dilation, self.groups)
-    v = Variable(self.wc_blank).reshape((1, -1, 1, 1))
-    wxhc = wx + wh + torch.cat((wc[:, :2 * self.out_channels],
-                                v.expand(wc.size(0), wc.size(1) // 3,
-                                         wc.size(2), wc.size(3)),
-                                wc[:, 2 * self.out_channels:]), 1)
-
-    i = torch.sigmoid(wxhc[:, :self.out_channels])
-    f = torch.sigmoid(wxhc[:, self.out_channels:2 * self.out_channels])
-    g = torch.tanh(wxhc[:, 2 * self.out_channels:3 * self.out_channels])
-    o = torch.sigmoid(wxhc[:, 3 * self.out_channels:])
-
-    c_1 = f * c_0 + i * g
-    h_1 = o * torch.tanh(c_1)
-    return h_1, (h_1, c_1)
-
-
-class ZeroUpsample(nn.Module):
-  def __init__(self, scale_factor):
-    super(ZeroUpsample, self).__init__()
-    self.ps = nn.PixelShuffle(scale_factor)
-    self.scale = scale_factor
-
-  def forward(self, x):
-    z = torch.zeros_like(x).repeat_interleave(self.scale ** 2 - 1, dim=1)
-    x = torch.cat((x, z), dim=1)
-    return self.ps(x)
-
-
-class SPMC(nn.Module):
-  def __init__(self, scale):
-    super(SPMC, self).__init__()
-    self.zero_up = ZeroUpsample(scale)
-    self.warper = STN()
-    self.scale = scale
-
-  def forward(self, x, u=0, v=0, flow=None):
-    if flow is not None:
-      u = flow[:, 0]
-      v = flow[:, 1]
-    x2 = self.zero_up(x)
-    u2 = self.zero_up(u.unsqueeze(1)) * self.scale
-    v2 = self.zero_up(v.unsqueeze(1)) * self.scale
-    return self.warper(x2, u2.squeeze(1), v2.squeeze(1))
-
-
-class MotionEstimation(nn.Module):
-  def __init__(self, channel, gain=32):
-    super(MotionEstimation, self).__init__()
-    self.gain = gain
-    in_c = channel * 2
-    # Coarse Flow
-    conv1 = nn.Sequential(nn.Conv2d(in_c, 24, 5, 2, 2), nn.ReLU(True))
-    conv2 = nn.Sequential(nn.Conv2d(24, 24, 3, 1, 1), nn.ReLU(True))
-    conv3 = nn.Sequential(nn.Conv2d(24, 24, 5, 2, 2), nn.ReLU(True))
-    conv4 = nn.Sequential(nn.Conv2d(24, 24, 3, 1, 1), nn.ReLU(True))
-    conv5 = nn.Sequential(nn.Conv2d(24, 32, 3, 1, 1), nn.Tanh())
-    up1 = nn.PixelShuffle(4)
-    self.coarse_flow = nn.Sequential(conv1, conv2, conv3, conv4, conv5, up1)
-    # Fine Flow
-    in_c = channel * 3 + 2
-    conv1 = nn.Sequential(nn.Conv2d(in_c, 24, 5, 2, 2), nn.ReLU(True))
-    conv2 = nn.Sequential(nn.Conv2d(24, 24, 3, 1, 1), nn.ReLU(True))
-    conv3 = nn.Sequential(nn.Conv2d(24, 24, 3, 1, 1), nn.ReLU(True))
-    conv4 = nn.Sequential(nn.Conv2d(24, 24, 3, 1, 1), nn.ReLU(True))
-    conv5 = nn.Sequential(nn.Conv2d(24, 8, 3, 1, 1), nn.Tanh())
-    up2 = nn.PixelShuffle(2)
-    self.fine_flow = nn.Sequential(conv1, conv2, conv3, conv4, conv5, up2)
-    self.warper = STN(padding_mode='border')
-
-  def forward(self, target, ref, to_tuple=None):
-    flow0 = self.coarse_flow(torch.cat((ref, target), dim=1))
-    w0 = self.warper(ref, flow0[:, 0], flow0[:, 1])
-    flow_res = self.fine_flow(torch.cat((ref, target, flow0, w0), dim=1))
-    flow1 = (flow_res + flow0) * self.gain
-    if to_tuple:
-      return flow1[:, 0], flow1[:, 1]
-    return flow1
-
-
-class DetailFusion(nn.Module):
-  def __init__(self, channel, base_filter):
-    super(DetailFusion, self).__init__()
-    f = base_filter
-    self.enc1 = EasyConv2d(channel, f, 5, activation='relu')
-    self.enc2 = nn.Sequential(
-      EasyConv2d(f, f * 2, 3, 2, activation='relu'),
-      EasyConv2d(f * 2, f * 2, 3, activation='relu'))
-    self.enc3 = EasyConv2d(f * 2, f * 4, 3, 2, activation='relu')
-    self.lstm = Conv2dLSTMCell(f * 4, f * 4, 3, 1, 1)
-    self.dec1 = nn.Sequential(
-      EasyConv2d(f * 4, f * 4, 3, activation='relu'),
-      nn.ConvTranspose2d(f * 4, f * 2, 4, 2, 1),
-      nn.ReLU(True))
-    self.dec2 = nn.Sequential(
-      EasyConv2d(f * 2, f * 2, 3, activation='relu'),
-      nn.ConvTranspose2d(f * 2, f, 4, 2, 1),
-      nn.ReLU(True))
-    self.dec3 = nn.Sequential(
-      EasyConv2d(f, f, 3, activation='relu'),
-      EasyConv2d(f, channel, 5))
-
-  def forward(self, x, hx):
-    add1 = self.enc1(x)
-    add2 = self.enc2(add1)
-    h0 = self.enc3(add2)
-    x, hx = self.lstm(h0, hx)
-    x = self.dec1(x)
-    x = self.dec2(x + add2)
-    x = self.dec3(x + add1)
-    return x, hx
-
-
-class DetailRevealer(nn.Module):
-  def __init__(self, scale, channel, **kwargs):
-    super(DetailRevealer, self).__init__()
-    self.base_filter = kwargs.get('base_filter', 32)
-    self.me = MotionEstimation(channel, gain=kwargs.get('gain', 32))
-    self.spmc = SPMC(scale)
-    self.vsr = DetailFusion(channel, self.base_filter)
-    self.scale = scale
-    self.hidden_state = None
-
-  def reset(self):
-    self.hidden_state = None
-
-  def forward(self, target, ref):
-    flow = self.me(target, ref)
-    hr_ref = self.spmc(ref, flow=flow)
-    hr_target = upsample(target, self.scale)
-    if self.hidden_state is None:
-      batch, _, height, width = hr_ref.shape
-      hidden_shape = (batch, self.base_filter * 4, height // 4, width // 4)
-      hx = (torch.zeros(hidden_shape, device=ref.device),
-            torch.zeros(hidden_shape, device=ref.device))
-    else:
-      hx = self.hidden_state
-    res, hx = self.vsr(hr_ref, hx)
-    sr = hr_target + res
-    self.hidden_state = hx
-    return sr, flow
diff --git a/VSR/Backend/Torch/Models/srfeat/__init__.py b/VSR/Backend/Torch/Models/srfeat/__init__.py
deleted file mode 100644
index cc297c5..0000000
--- a/VSR/Backend/Torch/Models/srfeat/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-#  Copyright (c): Wenyi Tang 2017-2019.
-#  Author: Wenyi Tang
-#  Email: wenyi.tang@intel.com
-#  Update Date: 2019/4/27 下午11:06
-
-import logging
-
-_logger = logging.getLogger("VSR.SRFEAT")
-_logger.info("LICENSE: SRFeat is proposed by S. Park, et. al. "
-             "Implemented via PyTorch by @LoSealL.")
diff --git a/VSR/Backend/Torch/Models/srfeat/ops.py b/VSR/Backend/Torch/Models/srfeat/ops.py
deleted file mode 100644
index 1684389..0000000
--- a/VSR/Backend/Torch/Models/srfeat/ops.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#  Copyright (c): Wenyi Tang 2017-2019.
-#  Author: Wenyi Tang
-#  Email: wenyi.tang@intel.com
-#  Update Date: 2019/4/27 下午11:06
-
-import torch
-import torch.nn as nn
-
-from ..Arch import RB, Upsample
-
-
-class Generator(nn.Module):
-  """Generator for SRFeat: Single Image Super-Resolution with Feature Discrimination (ECCV 2018)
-
-  """
-
-  def __init__(self, channel, scale, filters, num_rb):
-    super(Generator, self).__init__()
-    self.head = nn.Conv2d(channel, filters, 9, 1, 4)
-    for i in range(num_rb):
-      setattr(self, f'rb_{i:02d}', RB(filters, 3, 'lrelu', use_bn=True))
-      setattr(self, f'merge_{i:02d}', nn.Conv2d(filters, filters, 1))
-    self.tail = nn.Sequential(
-      Upsample(filters, scale),
-      nn.Conv2d(filters, channel, 3, 1, 1))
-    self.num_rb = num_rb
-
-  def forward(self, inputs):
-    x = self.head(inputs)
-    feat = []
-    for i in range(self.num_rb):
-      x = getattr(self, f'rb_{i:02d}')(x)
-      feat.append(getattr(self, f'merge_{i:02d}')(x))
-    x = self.tail(x + torch.stack(feat, dim=0).sum(0).squeeze(0))
-    return x
diff --git a/VSR/Backend/Torch/Models/srmd/__init__.py b/VSR/Backend/Torch/Models/srmd/__init__.py
deleted file mode 100644
index cc3be81..0000000
--- a/VSR/Backend/Torch/Models/srmd/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-#  Copyright (c) 2017-2020 Wenyi Tang.
-#  Author: Wenyi Tang
-#  Email: wenyitang@outlook.com
-#  Update: 2020 - 2 - 11
-
-import logging
-
-logging.getLogger("VSR.SRFEAT").info(
-    "LICENSE: SRMD is proposed by Kai Zhang, et. al. "
-    "Implemented via PyTorch by @LoSealL.")
diff --git a/VSR/Backend/Torch/Models/srmd/ops.py b/VSR/Backend/Torch/Models/srmd/ops.py
deleted file mode 100644
index 81f5239..0000000
--- a/VSR/Backend/Torch/Models/srmd/ops.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#  Copyright (c) 2017-2020 Wenyi Tang.
-#  Author: Wenyi Tang
-#  Email: wenyitang@outlook.com
-#  Update: 2020 - 2 - 11
-
-import torch
-import torch.nn as nn
-
-from ..Arch import EasyConv2d
-
-
-class Net(nn.Module):
-  """
-  SRMD CNN network. 12 conv layers
-  """
-
-  def __init__(self, scale=4, channels=3, layers=12, filters=128,
-               pca_length=15):
-    super(Net, self).__init__()
-    self.pca_length = pca_length
-    net = [EasyConv2d(channels + pca_length + 1, filters, 3, activation='relu')]
-    net += [EasyConv2d(filters, filters, 3, activation='relu') for _ in
-            range(layers - 2)]
-    net += [EasyConv2d(filters, channels * scale ** 2, 3),
-            nn.PixelShuffle(scale)]
-    self.body = nn.Sequential(*net)
-
-  def forward(self, x, kernel=None, noise=None):
-    if kernel is None and noise is None:
-      kernel = torch.zeros(x.shape[0], 15, 1, device=x.device, dtype=x.dtype)
-      noise = torch.zeros(x.shape[0], 1, 1, device=x.device, dtype=x.dtype)
-    # degradation parameter
-    degpar = torch.cat([kernel, noise.reshape([-1, 1, 1])], dim=1)
-    degpar = degpar.reshape([-1, 1 + self.pca_length, 1, 1])
-    degpar = torch.ones_like(x)[:, 0:1] * degpar
-    _x = torch.cat([x, degpar], dim=1)
-    return self.body(_x)
diff --git a/VSR/Backend/Torch/Models/teco/__init__.py b/VSR/Backend/Torch/Models/teco/__init__.py
deleted file mode 100644
index 8d3aed0..0000000
--- a/VSR/Backend/Torch/Models/teco/__init__.py
+++ /dev/null
@@ -1,11 +0,0 @@
-#  Copyright (c): Wenyi Tang 2017-2019.
-#  Author: Wenyi Tang
-#  Email: wenyi.tang@intel.com
-#  Update Date: 2019/4/27 下午2:37
-
-import logging
-
-_logger = logging.getLogger("VSR.TecoGAN")
-_logger.info("LICENSE: TecoGAN is implemented by Mengyu Chu, et. al. "
-             "@rachelchu https://github.com/rachelchu/TecoGAN")
-_logger.warning("Training of TecoGAN hasn't been verified!!")
diff --git a/VSR/Backend/Torch/Models/teco/ops.py b/VSR/Backend/Torch/Models/teco/ops.py
deleted file mode 100644
index c15bf3b..0000000
--- a/VSR/Backend/Torch/Models/teco/ops.py
+++ /dev/null
@@ -1,64 +0,0 @@
-#  Copyright (c): Wenyi Tang 2017-2019.
-#  Author: Wenyi Tang
-#  Email: wenyi.tang@intel.com
-#  Update Date: 2019/4/27 下午2:37
-
-import torch
-from torch import nn
-
-from ..Arch import EasyConv2d, RB, Upsample
-
-
-class TecoGenerator(nn.Module):
-  """Generator in TecoGAN.
-
-  Note: the flow estimation net `Fnet` shares with FRVSR.
-
-  Args:
-    filters: basic filter numbers [default: 64]
-    num_rb: number of residual blocks [default: 16]
-  """
-
-  def __init__(self, channel, scale, filters, num_rb):
-    super(TecoGenerator, self).__init__()
-    rbs = []
-    for i in range(num_rb):
-      rbs.append(RB(filters, 3, 'relu'))
-    self.body = nn.Sequential(
-      EasyConv2d(channel * (1 + scale ** 2), filters, 3, activation='relu'),
-      *rbs,
-      Upsample(filters, scale, 'nearest', activation='relu'),
-      EasyConv2d(filters, channel, 3))
-
-  def forward(self, x, prev, residual=None):
-    """`residual` is the bicubically upsampled HR images"""
-    sr = self.body(torch.cat((x, prev), dim=1))
-    if residual is not None:
-      sr += residual
-    return sr
-
-
-class TecoDiscriminator(nn.Module):
-  def __init__(self, channel, filters, patch_size):
-    super(TecoDiscriminator, self).__init__()
-    f = filters
-    self.conv0 = EasyConv2d(channel * 6, f, 3, activation='leaky')
-    self.conv1 = EasyConv2d(f, f, 4, 2, activation='leaky', use_bn=True)
-    self.conv2 = EasyConv2d(f, f, 4, 2, activation='leaky', use_bn=True)
-    self.conv3 = EasyConv2d(f, f * 2, 4, 2, activation='leaky', use_bn=True)
-    self.conv4 = EasyConv2d(f * 2, f * 4, 4, 2, activation='leaky', use_bn=True)
-    # self.pool = nn.AdaptiveAvgPool2d(1)
-    self.linear = nn.Linear(f * 4 * (patch_size // 16) ** 2, 1)
-
-  def forward(self, x):
-    """The inputs `x` is the concat of 8 tensors.
-      Note that we remove the duplicated gt/yt in paper (9 - 1 = 8).
-    """
-    l0 = self.conv0(x)
-    l1 = self.conv1(l0)
-    l2 = self.conv2(l1)
-    l3 = self.conv3(l2)
-    l4 = self.conv4(l3)
-    # y = self.pool(l4)
-    y = self.linear(l4.flatten(1))
-    return y, (l1, l2, l3, l4)
diff --git a/VSR/Backend/Torch/Models/vespcn/__init__.py b/VSR/Backend/Torch/Models/vespcn/__init__.py
deleted file mode 100644
index 4bddf34..0000000
--- a/VSR/Backend/Torch/Models/vespcn/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-#  Copyright (c): Wenyi Tang 2017-2019.
-#  Author: Wenyi Tang
-#  Email: wenyi.tang@intel.com
-#  Update Date: 2019/4/3 下午5:10
-
-import logging
-
-_logger = logging.getLogger("VSR.VESPCN")
-_logger.info("LICENSE: VESPCN is proposed at CVPR2017 by Twitter. "
-             "Implemented by myself @LoSealL.")
diff --git a/VSR/Backend/Torch/Models/vespcn/ops.py b/VSR/Backend/Torch/Models/vespcn/ops.py
deleted file mode 100644
index 36a983b..0000000
--- a/VSR/Backend/Torch/Models/vespcn/ops.py
+++ /dev/null
@@ -1,106 +0,0 @@
-#  Copyright (c): Wenyi Tang 2017-2019.
-#  Author: Wenyi Tang
-#  Email: wenyi.tang@intel.com
-#  Update Date: 2019/4/3 下午5:10
-
-import torch
-from torch import nn
-from torch.nn import functional as F
-from ..video.motion import STN
-
-
-class RB(nn.Module):
-  def __init__(self, inchannels, outchannels):
-    super(RB, self).__init__()
-    self.conv1 = nn.Conv2d(inchannels, 64, 3, 1, 1)
-    self.conv2 = nn.Conv2d(64, outchannels, 3, 1, 1)
-    if inchannels != outchannels:
-      self.sc = nn.Conv2d(inchannels, outchannels, 1)
-
-  def forward(self, inputs):
-    x = F.relu(inputs)
-    x = self.conv1(x)
-    x = F.relu(x)
-    x = self.conv2(x)
-    if hasattr(self, 'sc'):
-      sc = self.sc(inputs)
-    else:
-      sc = inputs
-    return x + sc
-
-
-class MotionCompensation(nn.Module):
-  def __init__(self, channel, gain=32):
-    super(MotionCompensation, self).__init__()
-    self.gain = gain
-    in_c = channel * 2
-    # Coarse Flow
-    conv1 = nn.Sequential(nn.Conv2d(in_c, 24, 5, 2, 2), nn.ReLU(True))
-    conv2 = nn.Sequential(nn.Conv2d(24, 24, 3, 1, 1), nn.ReLU(True))
-    conv3 = nn.Sequential(nn.Conv2d(24, 24, 5, 2, 2), nn.ReLU(True))
-    conv4 = nn.Sequential(nn.Conv2d(24, 24, 3, 1, 1), nn.ReLU(True))
-    conv5 = nn.Sequential(nn.Conv2d(24, 32, 3, 1, 1), nn.Tanh())
-    up1 = nn.PixelShuffle(4)
-    self.coarse_flow = nn.Sequential(conv1, conv2, conv3, conv4, conv5, up1)
-    # Fine Flow
-    in_c = channel * 3 + 2
-    conv1 = nn.Sequential(nn.Conv2d(in_c, 24, 5, 2, 2), nn.ReLU(True))
-    conv2 = nn.Sequential(nn.Conv2d(24, 24, 3, 1, 1), nn.ReLU(True))
-    conv3 = nn.Sequential(nn.Conv2d(24, 24, 3, 1, 1), nn.ReLU(True))
-    conv4 = nn.Sequential(nn.Conv2d(24, 24, 3, 1, 1), nn.ReLU(True))
-    conv5 = nn.Sequential(nn.Conv2d(24, 8, 3, 1, 1), nn.Tanh())
-    up2 = nn.PixelShuffle(2)
-    self.fine_flow = nn.Sequential(conv1, conv2, conv3, conv4, conv5, up2)
-    self.warp1 = STN(padding_mode='border')
-    self.warp2 = STN(padding_mode='border')
-
-  def forward(self, target, ref):
-    flow0 = self.coarse_flow(torch.cat([ref, target], 1))
-    flow0 *= self.gain
-    w0 = self.warp1(ref, flow0[:, 0], flow0[:, 1])
-    flow1 = self.fine_flow(torch.cat([ref, target, flow0, w0], 1))
-    flow1 *= self.gain
-    flow1 += flow0
-    w1 = self.warp2(ref, flow1[:, 0], flow1[:, 1])
-    return w1, flow1
-
-
-class SRNet(nn.Module):
-  def __init__(self, scale, channel, depth):
-    super(SRNet, self).__init__()
-    self.entry = nn.Conv2d(channel * depth, 64, 3, 1, 1)
-    self.exit = nn.Conv2d(64, channel, 3, 1, 1)
-    self.body = nn.Sequential(RB(64, 64), RB(64, 64), RB(64, 64), nn.ReLU(True))
-    self.conv = nn.Conv2d(64, 64 * scale ** 2, 3, 1, 1)
-    self.up = nn.PixelShuffle(scale)
-
-  def forward(self, inputs):
-    x = self.entry(inputs)
-    y = self.body(x) + x
-    y = self.conv(y)
-    y = self.up(y)
-    y = self.exit(y)
-    return y
-
-
-class VESPCN(nn.Module):
-  def __init__(self, scale, channel, depth):
-    super(VESPCN, self).__init__()
-    self.sr = SRNet(scale, channel, depth)
-    self.mc = MotionCompensation(channel)
-    self.depth = depth
-
-  def forward(self, *inputs):
-    center = self.depth // 2
-    target = inputs[center]
-    refs = inputs[:center] + inputs[center + 1:]
-    warps = []
-    flows = []
-    for r in refs:
-      warp, flow = self.mc(target, r)
-      warps.append(warp)
-      flows.append(flow)
-    warps.append(target)
-    x = torch.cat(warps, 1)
-    sr = self.sr(x)
-    return sr, warps[:-1], flows
diff --git a/VSR/Backend/Torch/Models/video/__init__.py b/VSR/Backend/Torch/Models/video/__init__.py
deleted file mode 100644
index 7d064ec..0000000
--- a/VSR/Backend/Torch/Models/video/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-#  Copyright (c): Wenyi Tang 2017-2019.
-#  Author: Wenyi Tang
-#  Email: wenyi.tang@intel.com
-#  Update Date: 2019/4/3 下午5:10
-
-import logging
-
-_logger = logging.getLogger("VSR.VIDEO")
diff --git a/VSR/Backend/Torch/Models/video/motion.py b/VSR/Backend/Torch/Models/video/motion.py
deleted file mode 100644
index d9e1478..0000000
--- a/VSR/Backend/Torch/Models/video/motion.py
+++ /dev/null
@@ -1,93 +0,0 @@
-#  Copyright (c): Wenyi Tang 2017-2019.
-#  Author: Wenyi Tang
-#  Email: wenyi.tang@intel.com
-#  Update Date: 2019/4/3 下午5:10
-
-import torch
-from torch import nn
-from torch.nn import functional as F
-
-from VSR.Util.Math import nd_meshgrid
-from ...Util.Utility import irtranspose, transpose
-
-
-class STN(nn.Module):
-  """Spatial transformer network.
-    For optical flow based frame warping.
-
-  Args:
-    mode: sampling interpolation mode of `grid_sample`
-    padding_mode: can be `zeros` | `borders`
-    normalized: flow value is normalized to [-1, 1] or absolute value
-  """
-
-  def __init__(self, mode='bilinear', padding_mode='zeros', normalize=False):
-    super(STN, self).__init__()
-    self.mode = mode
-    self.padding_mode = padding_mode
-    self.norm = normalize
-
-  def forward(self, inputs, u, v):
-    batch = inputs.size(0)
-    device = inputs.device
-    mesh = nd_meshgrid(*inputs.shape[-2:], permute=[1, 0])
-    mesh = torch.tensor(mesh, dtype=torch.float32, device=device)
-    mesh = mesh.unsqueeze(0).repeat_interleave(batch, dim=0)
-    # add flow to mesh
-    _u, _v = u, v
-    if not self.norm:
-      # flow needs to normalize to [-1, 1]
-      h, w = inputs.shape[-2:]
-      _u = u / w * 2
-      _v = v / h * 2
-    flow = torch.stack([_u, _v], dim=-1)
-    assert flow.shape == mesh.shape, \
-      f"Shape mis-match: {flow.shape} != {mesh.shape}"
-    mesh = mesh + flow
-    return F.grid_sample(inputs, mesh,
-                         mode=self.mode, padding_mode=self.padding_mode)
-
-
-class STTN(nn.Module):
-  """Spatio-temporal transformer network. (ECCV 2018)
-
-  Args:
-    transpose_ncthw: how input tensor be transposed to format NCTHW
-    mode: sampling interpolation mode of `grid_sample`
-    padding_mode: can be `zeros` | `borders`
-    normalize: flow value is normalized to [-1, 1] or absolute value
-  """
-
-  def __init__(self, transpose_ncthw=(0, 1, 2, 3, 4),
-               normalize=False, mode='bilinear', padding_mode='zeros'):
-    super(STTN, self).__init__()
-    self.normalized = normalize
-    self.mode = mode
-    self.padding_mode = padding_mode
-    self.t = transpose_ncthw
-
-  def forward(self, inputs, d, u, v):
-    _error_msg = "STTN only works for 5D tensor but got {}D input!"
-    if inputs.dim() != 5:
-      raise ValueError(_error_msg.format(inputs.dim()))
-    device = inputs.device
-    batch, channel, t, h, w = (inputs.shape[i] for i in self.t)
-    mesh = nd_meshgrid(t, h, w, permute=[2, 1, 0])
-    mesh = torch.tensor(mesh, dtype=torch.float32, device=device)
-    mesh = mesh.unsqueeze(0).repeat_interleave(batch, dim=0)
-    _d, _u, _v = d, u, v
-    if not self.normalized:
-      _d = d / t * 2
-      _u = u / w * 2
-      _v = v / h * 2
-    st_flow = torch.stack([_u, _v, _d], dim=-1)
-    st_flow = st_flow.unsqueeze(1).repeat_interleave(t, dim=1)
-    assert st_flow.shape == mesh.shape, \
-      f"Shape mis-match: {st_flow.shape} != {mesh.shape}"
-    mesh = mesh + st_flow
-    inputs = transpose(inputs, self.t)
-    warp = F.grid_sample(inputs, mesh, mode=self.mode,
-                         padding_mode=self.padding_mode)
-    # STTN warps into a single frame
-    warp = warp[:, :, 0:1]
-    return irtranspose(warp, self.t)
diff --git a/VSR/Backend/Torch/Util/Distortion.py b/VSR/Backend/Torch/Util/Distortion.py
new file mode 100644
index 0000000..15a45b7
--- /dev/null
+++ b/VSR/Backend/Torch/Util/Distortion.py
@@ -0,0 +1,140 @@
+#  Copyright (c) 2017-2020 Wenyi Tang.
+#  Author: Wenyi Tang
+#  Email: wenyitang@outlook.com
+#  Update: 2020 - 6 - 9
+
+import random
+
+import torch
+import torch.nn as nn
+import torchvision.transforms.functional as F
+from torchvision.transforms import Compose, Lambda, ToPILImage, ToTensor
+
+from VSR.Util.Math import gaussian_kernel
+from ..Util.Utility import gaussian_noise, imfilter, poisson_noise
+
+
+class Distortion(nn.Module):
+  """Randomly change the brightness, contrast and saturation of an image.
+
+  Args:
+      brightness (float or tuple of float (min, max)): How much to jitter brightness.
+          brightness_factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness]
+          or the given [min, max]. Should be non negative numbers.
+      contrast (float or tuple of float (min, max)): How much to jitter contrast.
+          contrast_factor is chosen uniformly from [max(0, 1 - contrast), 1 + contrast]
+          or the given [min, max]. Should be non negative numbers.
+      saturation (float or tuple of float (min, max)): How much to jitter saturation.
+          saturation_factor is chosen uniformly from [max(0, 1 - saturation), 1 + saturation]
+          or the given [min, max]. Should be non negative numbers.
+      hue (float or tuple of float (min, max)): How much to jitter hue.
+          hue_factor is chosen uniformly from [-hue, hue] or the given [min, max].
+          Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5.
+  """
+
+  def __init__(self, brightness=0, contrast=0, saturation=0, hue=0,
+               gaussian_noise_std=0, poisson_noise_std=0, gaussian_blur_std=0):
+    super(Distortion, self).__init__()
+    self.brightness = self._check_input(brightness, 'brightness')
+    self.contrast = self._check_input(contrast, 'contrast')
+    self.saturation = self._check_input(saturation, 'saturation')
+    self.hue = self._check_input(hue, 'hue', center=0, bound=(-0.5, 0.5),
+                                 clip_first_on_zero=False)
+    self.awgn = self._check_input(gaussian_noise_std, 'awgn', center=0,
+                                  bound=(0, 0.75), clip_first_on_zero=True)
+    self.poisson = None
+    self.blur = self._check_input(gaussian_blur_std, 'blur', center=0)
+    self.blur_padding = nn.ReflectionPad2d(7)
+
+  def _check_input(self, value, name, center=1, bound=(0, float('inf')),
+                   clip_first_on_zero=True):
+    if isinstance(value, (tuple, list)) and len(value) == 2:
+      if not bound[0] <= value[0] <= value[1] <= bound[1]:
+        raise ValueError("{} values should be between {}".format(name, bound))
+    else:
+      if value < 0:
+        raise ValueError(
+            "If {} is a single number, it must be non negative.".format(name))
+      value = [center - value, center + value]
+      if clip_first_on_zero:
+        value[0] = max(value[0], 0)
+
+    # if value is 0 or (1., 1.) for brightness/contrast/saturation
+    # or (0., 0.) for hue, do nothing
+    if value[0] == value[1] == center:
+      value = None
+    return value
+
+  @staticmethod
+  def get_params(brightness, contrast, saturation, hue, awgn, poisson, blur):
+    """Get a randomized transform to be applied on image.
+
+    Arguments are same as that of __init__.
+
+    Returns:
+        Transform which randomly adjusts brightness, contrast and
+        saturation in a random order.
+    """
+    transforms = []
+
+    brightness_factor = 0
+    if brightness is not None:
+      brightness_factor = random.uniform(brightness[0], brightness[1])
+      transforms.append(
+          Lambda(lambda img: F.adjust_brightness(img, brightness_factor)))
+    contrast_factor = 0
+    if contrast is not None:
+      contrast_factor = random.uniform(contrast[0], contrast[1])
+      transforms.append(
+          Lambda(lambda img: F.adjust_contrast(img, contrast_factor)))
+    saturation_factor = 0
+    if saturation is not None:
+      saturation_factor = random.uniform(saturation[0], saturation[1])
+      transforms.append(
+          Lambda(lambda img: F.adjust_saturation(img, saturation_factor)))
+    hue_factor = 0
+    if hue is not None:
+      hue_factor = random.uniform(hue[0], hue[1])
+      transforms.append(Lambda(lambda img: F.adjust_hue(img, hue_factor)))
+
+    random.shuffle(transforms)
+    transform = Compose([
+      ToPILImage('RGB'),
+      *transforms,
+      ToTensor()
+    ])
+    factors = [
+      brightness_factor, contrast_factor, saturation_factor, hue_factor
+    ]
+    return transform, factors
+
+  def forward(self, x):
+    img = [x_[0].cpu() for x_ in torch.split(x, 1, dim=0)]
+    factors = []
+    for i in range(len(img)):
+      # color jitter
+      transform, fac = self.get_params(self.brightness, self.contrast,
+                                       self.saturation, self.hue, self.awgn,
+                                       self.poisson, self.blur)
+      img[i] = transform(img[i])
+      # noise & blur
+      blur_factor = 0
+      if self.blur is not None:
+        blur_factor = random.uniform(*self.blur)
+        img[i] = imfilter(
+            img[i],
+            torch.tensor(gaussian_kernel(15, blur_factor),
+                         device=img[i].device),
+            self.blur_padding)[0]
+      awgn_factor = 0
+      if self.awgn is not None:
+        awgn_factor = random.uniform(*self.awgn)
+        img[i] += gaussian_noise(img[i], stddev=awgn_factor, channel_wise=False)
+      poisson_factor = 0
+      if self.poisson is not None:
+        poisson_factor = random.uniform(*self.poisson)
+        img[i] += poisson_noise(img[i], stddev=poisson_factor)
+      fac += [awgn_factor, poisson_factor, blur_factor]
+      factors.append(torch.tensor(fac))
+      img[i] = img[i].clamp(0, 1)
+    return torch.stack(img).to(x.device), torch.stack(factors).to(x.device)
diff --git a/VSR/Backend/Torch/Util/Utility.py b/VSR/Backend/Torch/Util/Utility.py
index fd5cf3f..243dff0 100644
--- a/VSR/Backend/Torch/Util/Utility.py
+++ b/VSR/Backend/Torch/Util/Utility.py
@@ -161,11 +161,13 @@ def bicubic_resize(img, scale, border='reflect'):
     raise ValueError("Wrong scale factor!")
 
 
-def imfilter(image: torch.Tensor, kernel: torch.Tensor):
+def imfilter(image: torch.Tensor, kernel: torch.Tensor, padding=None):
   with torch.no_grad():
     if image.dim() == 3:
       image = image.unsqueeze(0)
     assert image.dim() == 4, f"Dim of image must be 4, but is {image.dim()}"
+    if kernel.dtype != image.dtype:
+      kernel = kernel.to(dtype=image.dtype)
     if kernel.dim() == 2:
       kernel = kernel.unsqueeze(0)
       kernel = torch.cat([kernel] * image.shape[0])
@@ -182,28 +184,36 @@ def imfilter(image: torch.Tensor, kernel: torch.Tensor):
         t[j] = _k
         _m.append(torch.cat(t, dim=1))
       _k = torch.cat(_m, dim=0)
-      ret.append(F.conv2d(i, _k, padding=[x // 2 for x in kernel.shape[1:]]))
+      if padding is None:
+        ret.append(F.conv2d(i, _k, padding=[x // 2 for x in kernel.shape[1:]]))
+      elif callable(padding):
+        ret.append(F.conv2d(padding(i), _k))
+      else:
+        raise ValueError("Wrong padding value!")
     return torch.cat(ret)
 
 
-def poisson_noise(inputs, stddev=None, sigma_max=0.16):
+def poisson_noise(inputs: torch.Tensor, stddev=None, sigma_max=0.16):
   """Add poisson noise to inputs."""
 
   if stddev is None:
-    stddev = np.random.rand(inputs.shape[-1]) * sigma_max
-  stddev = np.reshape(stddev, [1] * (inputs.ndim - 1) + [-1])
+    stddev = torch.rand(inputs.shape[-1]) * sigma_max
+  stddev = torch.tensor(stddev, device=inputs.device)
+  stddev = stddev.reshape([1] * (inputs.ndim - 1) + [-1])
   sigma_map = (1 - inputs) * stddev
-  return np.random.randn(*inputs.shape) * sigma_map
+  return torch.randn_like(inputs) * sigma_map
 
 
-def gaussian_noise(inputs, stddev=None, sigma_max=0.06, channel_wise=True):
+def gaussian_noise(inputs: torch.Tensor, stddev=None, sigma_max=0.06,
+                   channel_wise=True):
   """Add channel wise gaussian noise."""
 
   channel = inputs.shape[-1] if channel_wise else 1
   if stddev is None:
-    stddev = np.random.rand(channel) * sigma_max
-  stddev = np.reshape(stddev, [1] * (inputs.ndim - 1) + [-1])
-  noise_map = np.random.randn(*inputs.shape) * stddev
+    stddev = torch.rand(channel) * sigma_max
+  stddev = torch.tensor(stddev, device=inputs.device)
+  stddev = stddev.reshape([1] * (inputs.ndim - 1) + [-1])
+  noise_map = torch.randn_like(inputs) * stddev
   return noise_map
 
 
diff --git a/VSR/Backend/Torch/__init__.py b/VSR/Backend/Torch/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/VSR/Util/Math.py b/VSR/Util/Math.py
index 54e42db..741e0cb 100644
--- a/VSR/Util/Math.py
+++ b/VSR/Util/Math.py
@@ -157,3 +157,16 @@ def camera_response_function(inputs, crf_table, max_val=1):
   for i in inputs_index.flatten():
     ret.append(crf_table[i])
   return np.reshape(ret, inputs.shape)
+
+
+def gen_pca_mat(dim=15, kernel_size=15, samples=10000):
+  kernels = []
+  for i in range(samples):
+    theta = np.random.uniform(0, np.pi)
+    l1 = np.random.uniform(0.1, 10)
+    l2 = np.random.uniform(0.1, l1)
+    kernels.append(anisotropic_gaussian_kernel(kernel_size, theta, l1, l2))
+  kernels = np.stack(kernels).reshape([samples, -1]).transpose()
+  mat_c = np.matmul(kernels, kernels.transpose())
+  _, mat_v = np.linalg.eigh(mat_c, 'U')
+  return mat_v[..., -dim:].transpose()
diff --git a/VSR/Backend/Torch/Models/srmd/pca.py b/VSR/Util/PcaPrecompute.py
similarity index 99%
rename from VSR/Backend/Torch/Models/srmd/pca.py
rename to VSR/Util/PcaPrecompute.py
index 6e9f4d9..16a0d2d 100644
--- a/VSR/Backend/Torch/Models/srmd/pca.py
+++ b/VSR/Util/PcaPrecompute.py
@@ -1,11 +1,10 @@
 #  Copyright (c) 2017-2020 Wenyi Tang.
 #  Author: Wenyi Tang
 #  Email: wenyitang@outlook.com
-#  Update: 2020 - 2 - 12
+#  Update: 2020 - 6 - 15
 
-import numpy as np
-from VSR.Util.Math import anisotropic_gaussian_kernel
 
+import numpy as np
 
 # Pre-calculated PCA array
 _PCA = np.array(
@@ -3387,18 +3386,6 @@
     dtype=np.float
 )
 
-def gen_pca_mat(dim=15,kernel_size=15, samples=10000):
-  kernels = []
-  for i in range(samples):
-    theta = np.random.uniform(0, np.pi)
-    l1 = np.random.uniform(0.1, 10)
-    l2 = np.random.uniform(0.1, l1)
-    kernels.append(anisotropic_gaussian_kernel(kernel_size, theta, l1, l2))
-  kernels = np.stack(kernels).reshape([samples, -1]).transpose()
-  mat_c = np.matmul(kernels, kernels.transpose())
-  _, mat_v = np.linalg.eigh(mat_c, 'U')
-  return mat_v[..., -dim:].transpose()
-
 
 def get_degradation(kernel: np.ndarray):
   ret = np.matmul(_PCA, kernel.reshape([-1, 1]))
diff --git a/prepare_data.py b/prepare_data.py
index a8eeab9..25251e8 100644
--- a/prepare_data.py
+++ b/prepare_data.py
@@ -68,17 +68,6 @@
   'drsr_v2.zip': '1UrVNE6QMcQTW9Ks4P__JrRClb4IGTMYp',
   'drsr_sc2.zip': '1xIRVG7jbTM9fcLQkwyGyJIjwF2rTbNEJ',
   'drsr_sc4.zip': '1W-222rR2D2o-E99B4cXuUPBz2aCLuY_Z',
-  # GAN weights
-  'gangp.zip': '1UHiSLjaU5Yeiltl9cQsR3-EKta3yt0dI',
-  'lsgan.zip': '15dsubMpvTeCoSCIfPCcKjhnk7UMyuljt',
-  'ragan.zip': '1HWR2m3cFH-Fze1zkioj20ugDXRmjGQEH',
-  'ragangp.zip': '1lf3Rj3Lk1qISbQiIQiSJt03DVV5pp5Ml',
-  'ralsgan.zip': '180qrnH8_MdFvLlSl5MSP8sQCPLbbevsr',
-  'rgan.zip': '1ZwCB1Fa9UIybOq1SfgOeBKJ8g63KMYEK',
-  'rgangp.zip': '1QSBVscdfJvf_dMRRiBA_lCq39gX9mDZJ',
-  'rlsgan.zip': '1siDKxGvlb0p2E2_EmAJoT8knFMuQRivj',
-  'sgan.zip': '1spClB26QJNQEio_DktobQq9ALT-PHfg3',
-  'wgangp.zip': '1jyngiCyU1Js4DH5yUhug4gTPy2bQoETO',
   # PyTorch weights (Prefix "T")
   'Tsrcnn.zip': 'https://github.com/LoSealL/Model/releases/download/srcnn/Tsrcnn.zip',
   'Tespcn.zip': 'https://github.com/LoSealL/Model/releases/download/espcn/Tespcn.zip',
@@ -88,13 +77,11 @@
   'Tsofvsr.zip': 'https://github.com/LoSealL/Model/releases/download/sofvsr/SOFVSR_x4.zip',
   'Tcarn.zip': 'https://github.com/LoSealL/Model/releases/download/carn/tcarn.zip',
   'Tesrgan.zip': 'https://github.com/LoSealL/Model/releases/download/esrgan/esrgan.zip',
-  'Ttecogan.zip': 'https://github.com/LoSealL/Model/releases/download/tecogan/tecogan.zip',
   'Tfrvsr.zip': 'https://github.com/LoSealL/Model/releases/download/frvsr/FRVSR.zip',
   'Tmldn.zip': 'https://github.com/LoSealL/Model/releases/download/mldn/drn.zip',
   'Tcrdn.zip': 'https://github.com/LoSealL/Model/releases/download/crdn/rsr.zip',
   'Trbpn.zip': '1Ozp5j-DBWJSpXY5GvxiEPKdfCaAbOXqu',
   'Tspmc.zip': 'https://github.com/LoSealL/Model/releases/download/spmc/spmc.zip',
-  'Tvespcn.zip': 'https://github.com/LoSealL/Model/releases/download/vespcn/Tvespcn.zip',
   'Tsrmd.zip': '1ORKH05-aLSbQaWB4qQulIm2INoRufuD_',
   'Tdbpn.zip': '1PbhtuMz1zF3-d16dthurJ0xIQ9uyMvkz'
 }

From 45223379093af6267b652d9bcc8c4a78eafd1dd8 Mon Sep 17 00:00:00 2001
From: Wenyi Tang <wenyi.tang@intel.com>
Date: Wed, 17 Jun 2020 12:23:52 +0800
Subject: [PATCH 05/12] Set verbose to debug to log into file

---
 CHANGELOG.md                               |   1 +
 Tests/training_test.py                     |   4 +-
 VSR/Backend/Keras/Framework/Environment.py |  11 +-
 VSR/Backend/Keras/Framework/Trainer.py     |  57 ++------
 VSR/Backend/TF/Framework/Trainer.py        |  23 ++--
 VSR/Backend/Torch/Framework/Environment.py |  11 +-
 VSR/Backend/Torch/Framework/Trainer.py     |  63 +++------
 VSR/Backend/Torch/Models/Ops/Distortion.py | 147 +++++++--------------
 VSR/Backend/Torch/Models/Optim/SISR.py     |   4 +-
 VSR/Backend/Torch/Models/Srmd.py           |   9 +-
 VSR/Backend/Torch/Util/Distortion.py       | 140 --------------------
 VSR/Backend/Torch/Util/Utility.py          |  21 ++-
 VSR/Util/Ensemble.py                       |  38 ++++++
 13 files changed, 168 insertions(+), 361 deletions(-)
 delete mode 100644 VSR/Backend/Torch/Util/Distortion.py
 create mode 100644 VSR/Util/Ensemble.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1ac9d84..f6dd14b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,7 @@
 ## 2020-06
 - Update TF backend
 - Add support to tensorflow 2.0 (both legacy and eager mode)
+- Refactor torch backend models
 
 ## 1.0.5
 ## 2020-05
diff --git a/Tests/training_test.py b/Tests/training_test.py
index da6d651..d683ff4 100644
--- a/Tests/training_test.py
+++ b/Tests/training_test.py
@@ -11,7 +11,8 @@
   os.chdir('Tests')
 
 _WORKDIR = r"/tmp/vsr/utest/"
-_TCMD = r"python train.py {} --data_config=../Tests/data/fake_datasets.yml --dataset=normal --epochs=1 --steps=1 --save_dir={}"
+_TCMD = ("python train.py {} --data_config=../Tests/data/fake_datasets.yml"
+         "--dataset=normal --epochs=1 --steps=1 --save_dir={} --val_steps=1")
 _ECMD = r"python eval.py {} --save_dir={} --ensemble -t=../Tests/data/set5_x2"
 
 
@@ -43,6 +44,7 @@ def test_other_models():
         'sofvsr', 'vespcn', 'frvsr', 'qprn', 'ufvsr', 'yovsr', 'tecogan',
         'spmc', 'rbpn'
     ):
+      # skip video model
       continue
     train(k)
     eval(k)
diff --git a/VSR/Backend/Keras/Framework/Environment.py b/VSR/Backend/Keras/Framework/Environment.py
index 5ea5a3b..7325536 100644
--- a/VSR/Backend/Keras/Framework/Environment.py
+++ b/VSR/Backend/Keras/Framework/Environment.py
@@ -9,7 +9,7 @@
 import numpy as np
 import tensorflow as tf
 
-LOG = logging.getLogger('VSR.Framework')
+LOG = logging.getLogger('VSR.Framework.Keras')
 
 
 def _parse_ckpt_name(name):
@@ -50,9 +50,12 @@ def _startup(self):
                                               checkpoint_name=self.model.name)
     if isinstance(self._logd, Path):
       self._logd.mkdir(parents=True, exist_ok=True)
-      if LOG.isEnabledFor(logging.DEBUG):
-        hdl = logging.FileHandler(self._logd / 'training.txt')
-        LOG.addHandler(hdl)
+      _logger = logging.getLogger('VSR')
+      if _logger.isEnabledFor(logging.DEBUG):
+        fd = logging.FileHandler(self._logd / 'vsr_debug.log', encoding='utf-8')
+        fd.setFormatter(
+            logging.Formatter("[%(asctime)s][%(levelname)s] %(message)s"))
+        _logger.addHandler(fd)
 
   def _close(self):
     """TODO anything to close?"""
diff --git a/VSR/Backend/Keras/Framework/Trainer.py b/VSR/Backend/Keras/Framework/Trainer.py
index ad6542e..0e65e05 100644
--- a/VSR/Backend/Keras/Framework/Trainer.py
+++ b/VSR/Backend/Keras/Framework/Trainer.py
@@ -4,45 +4,16 @@
 #  Update: 2020 - 5 - 30
 
 import logging
-import time
 
 import numpy as np
 import tensorflow as tf
 import tqdm
 
 from VSR.Util.Config import Config
+from VSR.Util.Ensemble import Ensembler
 from .Environment import Env
 
-LOG = logging.getLogger('VSR.Framework')
-
-
-def _ensemble_expand(feature):
-  r0 = feature
-  r1 = np.rot90(feature, 1, axes=[-3, -2])
-  r2 = np.rot90(feature, 2, axes=[-3, -2])
-  r3 = np.rot90(feature, 3, axes=[-3, -2])
-  r4 = np.flip(feature, axis=-2)
-  r5 = np.rot90(r4, 1, axes=[-3, -2])
-  r6 = np.rot90(r4, 2, axes=[-3, -2])
-  r7 = np.rot90(r4, 3, axes=[-3, -2])
-  return r0, r1, r2, r3, r4, r5, r6, r7
-
-
-def _ensemble_reduce_mean(outputs):
-  results = []
-  for i in outputs:
-    outputs_ensemble = [
-      i[0],
-      np.rot90(i[1], 3, axes=[-3, -2]),
-      np.rot90(i[2], 2, axes=[-3, -2]),
-      np.rot90(i[3], 1, axes=[-3, -2]),
-      np.flip(i[4], axis=-2),
-      np.flip(np.rot90(i[5], 3, axes=[-3, -2]), axis=-2),
-      np.flip(np.rot90(i[6], 2, axes=[-3, -2]), axis=-2),
-      np.flip(np.rot90(i[7], 1, axes=[-3, -2]), axis=-2),
-    ]
-    results.append(np.concatenate(outputs_ensemble).mean(axis=0, keepdims=True))
-  return results
+LOG = logging.getLogger('VSR.Framework.Keras')
 
 
 def to_tensor(x):
@@ -80,7 +51,7 @@ def fit_init(self) -> bool:
       LOG.info(f'Found pre-trained epoch {v.epoch}>=target {v.epochs},'
                ' quit fitting.')
       return False
-    LOG.info('Fitting: {}'.format(self.model.name.upper()))
+    LOG.info(f'Fitting: {self.model.name.upper()}')
     if self._logd:
       v.writer = tf.summary.create_file_writer(str(self._logd),
                                                name=self.model.name)
@@ -106,12 +77,10 @@ def fit(self, loaders, config, **kwargs):
                                                          shuffle=True,
                                                          memory_limit=mem)
       v.train_loader.prefetch(shuffle=True, memory_usage=mem)
-      date = time.strftime('%Y-%m-%d %T', time.localtime())
       v.avg_meas = {}
       if v.lr_schedule and callable(v.lr_schedule):
         v.lr = v.lr_schedule(steps=v.epoch)
-      print('| {} | Epoch: {}/{} | LR: {:.2g} |'.format(
-          date, v.epoch, v.epochs, v.lr))
+      LOG.info(f"| Epoch: {v.epoch}/{v.epochs} | LR: {v.lr:.2g} |")
       with tqdm.tqdm(train_iter, unit='batch', ascii=True) as r:
         self.model.to_train()
         for items in r:
@@ -120,7 +89,7 @@ def fit(self, loaders, config, **kwargs):
       for _k, _v in v.avg_meas.items():
         _v = np.mean(_v)
         tf.summary.scalar(_k, _v, step=v.epoch, description='train')
-        print('| Epoch average {} = {:.6f} |'.format(_k, _v))
+        LOG.info(f"| Epoch average {_k} = {_v:.6f} |")
       if v.epoch % v.validate_every_n_epoch == 0 and v.val_loader:
         # Hard-coded memory limitation for validating
         self.benchmark(v.val_loader, v, memory_limit='1GB')
@@ -156,11 +125,13 @@ def benchmark(self, loader, config, **kwargs):
     self.model.to_eval()
     for items in tqdm.tqdm(it, 'Test', ascii=True):
       self.fn_benchmark_each_step(items)
+    log_message = str()
     for _k, _v in v.mean_metrics.items():
       _v = np.mean(_v)
       tf.summary.scalar(_k, _v, step=v.epoch, description='eval')
-      print('{}: {:.6f}'.format(_k, _v), end=', ')
-    print('')
+      log_message += f"{_k}: {_v:.6f}, "
+    log_message = log_message[:-2] + "."
+    LOG.info(log_message)
 
   def fn_benchmark_each_step(self, pack):
     v = self.v
@@ -189,11 +160,9 @@ def infer(self, loader, config, **kwargs):
     self._restore(config.epoch)
     it = loader.make_one_shot_iterator([1, -1, -1, -1], -1)
     if hasattr(it, '__len__'):
-      if len(it):
-        LOG.info('Inferring {} at epoch {}'.format(
-            self.model.name, self.last_epoch))
-      else:
+      if len(it) == 0:
         return
+      LOG.info(f"Inferring {self.model.name} at epoch {self.last_epoch}")
     # use original images in inferring
     self.model.to_eval()
     for items in tqdm.tqdm(it, 'Infer', ascii=True):
@@ -203,7 +172,7 @@ def fn_infer_each_step(self, pack):
     v = self.v
     if v.ensemble:
       # add self-ensemble boosting metric score
-      feature_ensemble = _ensemble_expand(pack['lr'])
+      feature_ensemble = Ensembler.expand(pack['lr'])
       outputs_ensemble = []
       for f in feature_ensemble:
         f = to_tensor(f)
@@ -213,7 +182,7 @@ def fn_infer_each_step(self, pack):
       outputs = []
       for i in range(len(outputs_ensemble[0])):
         outputs.append([j[i] for j in outputs_ensemble])
-      outputs = _ensemble_reduce_mean(outputs)
+      outputs = Ensembler.merge(outputs)
     else:
       feature = to_tensor(pack['lr'])
       outputs, _ = self.model.eval([feature])
diff --git a/VSR/Backend/TF/Framework/Trainer.py b/VSR/Backend/TF/Framework/Trainer.py
index a0a4fc6..19d126b 100644
--- a/VSR/Backend/TF/Framework/Trainer.py
+++ b/VSR/Backend/TF/Framework/Trainer.py
@@ -9,7 +9,6 @@
 """
 
 import logging
-import time
 from pathlib import Path
 
 import numpy as np
@@ -91,9 +90,12 @@ def _startup(self):
       self._saved.mkdir(parents=True, exist_ok=True)
     if isinstance(self._logd, Path):
       self._logd.mkdir(parents=True, exist_ok=True)
-      if LOG.isEnabledFor(logging.DEBUG):
-        hdl = logging.FileHandler(self._logd / 'training.txt')
-        LOG.addHandler(hdl)
+      _logger = logging.getLogger('VSR')
+      if _logger.isEnabledFor(logging.DEBUG):
+        fd = logging.FileHandler(self._logd / 'vsr_debug.log', encoding='utf-8')
+        fd.setFormatter(
+            logging.Formatter("[%(asctime)s][%(levelname)s] %(message)s"))
+        _logger.addHandler(fd)
     if self.model.compiled:
       self.graph = tf.get_default_graph()
     else:
@@ -260,18 +262,16 @@ def fn_train_each_epoch(self):
                                                        shuffle=True,
                                                        memory_limit=mem)
     v.train_loader.prefetch(v.memory_limit)
-    date = time.strftime('%Y-%m-%d %T', time.localtime())
     v.avg_meas = {}
     if v.lr_schedule and callable(v.lr_schedule):
       v.lr = v.lr_schedule(steps=v.global_step)
-    print('| {} | Epoch: {}/{} | LR: {:.2g} |'.format(
-        date, v.epoch, v.epochs, v.lr))
+    LOG.info(f"| Epoch: {v.epoch}/{v.epochs} | LR: {v.lr:.2g} |")
     with tqdm.tqdm(train_iter, unit='batch', ascii=True) as r:
       for items in r:
         self.fn_train_each_step(items)
         r.set_postfix(v.loss)
     for _k, _v in v.avg_meas.items():
-      print('| Epoch average {} = {:.6f} |'.format(_k, np.mean(_v)))
+      LOG.info(f"| Epoch average {_k} = {np.mean(_v):.6f} |")
     if v.epoch % v.validate_every_n_epoch == 0 and v.val_loader:
       self.benchmark(v.val_loader, v, epoch=v.epoch, memory_limit='1GB')
       v.summary_writer.add_summary(self.model.summary(), v.global_step)
@@ -386,6 +386,9 @@ def benchmark(self, loader, config, **kwargs):
     v.mean_metrics = {}
     v.loader = loader
     self.fn_benchmark_body()
+    log_message = str()
     for _k, _v in v.mean_metrics.items():
-      print('{}: {:.6f}'.format(_k, np.mean(_v)), end=', ')
-    print('')
+      _v = np.mean(_v)
+      log_message += f"{_k}: {_v:.6f}, "
+    log_message = log_message[:-2] + "."
+    LOG.info(log_message)
diff --git a/VSR/Backend/Torch/Framework/Environment.py b/VSR/Backend/Torch/Framework/Environment.py
index cf1ccc2..43350b6 100644
--- a/VSR/Backend/Torch/Framework/Environment.py
+++ b/VSR/Backend/Torch/Framework/Environment.py
@@ -9,7 +9,7 @@
 import numpy as np
 import torch
 
-LOG = logging.getLogger('VSR.Framework')
+LOG = logging.getLogger('VSR.Framework.Torch')
 
 
 def _make_ckpt_name(name, step):
@@ -52,9 +52,12 @@ def _startup(self):
       self._saved.mkdir(parents=True, exist_ok=True)
     if isinstance(self._logd, Path):
       self._logd.mkdir(parents=True, exist_ok=True)
-      if LOG.isEnabledFor(logging.DEBUG):
-        hdl = logging.FileHandler(self._logd / 'training.txt')
-        LOG.addHandler(hdl)
+      _logger = logging.getLogger('VSR')
+      if _logger.isEnabledFor(logging.DEBUG):
+        fd = logging.FileHandler(self._logd / 'vsr_debug.log', encoding='utf-8')
+        fd.setFormatter(
+            logging.Formatter("[%(asctime)s][%(levelname)s] %(message)s"))
+        _logger.addHandler(fd)
 
   def _close(self):
     """TODO anything to close?"""
diff --git a/VSR/Backend/Torch/Framework/Trainer.py b/VSR/Backend/Torch/Framework/Trainer.py
index 7d70dc0..55e92d8 100644
--- a/VSR/Backend/Torch/Framework/Trainer.py
+++ b/VSR/Backend/Torch/Framework/Trainer.py
@@ -4,46 +4,17 @@
 #  Update: 2020 - 2 - 7
 
 import logging
-import time
 
 import numpy as np
 import torch
 import tqdm
 
 from VSR.Util.Config import Config
+from VSR.Util.Ensemble import Ensembler
 from .Environment import Env
 from .Summary import Summarizer
 
-LOG = logging.getLogger('VSR.Framework')
-
-
-def _ensemble_expand(feature):
-  r0 = feature
-  r1 = np.rot90(feature, 1, axes=[-3, -2])
-  r2 = np.rot90(feature, 2, axes=[-3, -2])
-  r3 = np.rot90(feature, 3, axes=[-3, -2])
-  r4 = np.flip(feature, axis=-2)
-  r5 = np.rot90(r4, 1, axes=[-3, -2])
-  r6 = np.rot90(r4, 2, axes=[-3, -2])
-  r7 = np.rot90(r4, 3, axes=[-3, -2])
-  return r0, r1, r2, r3, r4, r5, r6, r7
-
-
-def _ensemble_reduce_mean(outputs):
-  results = []
-  for i in outputs:
-    outputs_ensemble = [
-      i[0],
-      np.rot90(i[1], 3, axes=[-3, -2]),
-      np.rot90(i[2], 2, axes=[-3, -2]),
-      np.rot90(i[3], 1, axes=[-3, -2]),
-      np.flip(i[4], axis=-2),
-      np.flip(np.rot90(i[5], 3, axes=[-3, -2]), axis=-2),
-      np.flip(np.rot90(i[6], 2, axes=[-3, -2]), axis=-2),
-      np.flip(np.rot90(i[7], 1, axes=[-3, -2]), axis=-2),
-    ]
-    results.append(np.concatenate(outputs_ensemble).mean(axis=0, keepdims=True))
-  return results
+LOG = logging.getLogger('VSR.Framework.Torch')
 
 
 def to_tensor(x, cuda=False):
@@ -85,7 +56,7 @@ def fit_init(self) -> bool:
       LOG.info(f'Found pre-trained epoch {v.epoch}>=target {v.epochs},'
                ' quit fitting.')
       return False
-    LOG.info('Fitting: {}'.format(self.model.name.upper()))
+    LOG.info(f'Fitting: {self.model.name.upper()}')
     if self._logd:
       v.writer = Summarizer(str(self._logd), self.model.name)
     return True
@@ -109,12 +80,10 @@ def fit(self, loaders, config, **kwargs):
                                                          shuffle=True,
                                                          memory_limit=mem)
       v.train_loader.prefetch(shuffle=True, memory_usage=mem)
-      date = time.strftime('%Y-%m-%d %T', time.localtime())
       v.avg_meas = {}
       if v.lr_schedule and callable(v.lr_schedule):
         v.lr = v.lr_schedule(steps=v.epoch)
-      print('| {} | Epoch: {}/{} | LR: {:.2g} |'.format(
-        date, v.epoch, v.epochs, v.lr))
+      LOG.info(f"| Epoch: {v.epoch}/{v.epochs} | LR: {v.lr:.2g} |")
       with tqdm.tqdm(train_iter, unit='batch', ascii=True) as r:
         self.model.to_train()
         for items in r:
@@ -124,7 +93,7 @@ def fit(self, loaders, config, **kwargs):
         _v = np.mean(_v)
         if isinstance(self.v.writer, Summarizer):
           v.writer.scalar(_k, _v, step=v.epoch, collection='train')
-        print('| Epoch average {} = {:.6f} |'.format(_k, _v))
+        LOG.info(f"| Epoch average {_k} = {_v:.6f} |")
       if v.epoch % v.validate_every_n_epoch == 0 and v.val_loader:
         # Hard-coded memory limitation for validating
         self.benchmark(v.val_loader, v, memory_limit='1GB')
@@ -161,12 +130,14 @@ def benchmark(self, loader, config, **kwargs):
     for items in tqdm.tqdm(it, 'Test', ascii=True):
       with torch.no_grad():
         self.fn_benchmark_each_step(items)
+    log_message = str()
     for _k, _v in v.mean_metrics.items():
       _v = np.mean(_v)
       if isinstance(self.v.writer, Summarizer):
         v.writer.scalar(_k, _v, step=v.epoch, collection='eval')
-      print('{}: {:.6f}'.format(_k, _v), end=', ')
-    print('')
+      log_message += f"{_k}: {_v:.6f}, "
+    log_message = log_message[:-2] + "."
+    LOG.info(log_message)
 
   def fn_benchmark_each_step(self, pack):
     v = self.v
@@ -181,7 +152,8 @@ def fn_benchmark_each_step(self, pack):
     outputs = [from_tensor(x) for x in outputs]
     for fn in v.inference_results_hooks:
       outputs = fn(outputs, names=pack['name'])
-      if outputs is None: break
+      if outputs is None:
+        break
 
   def infer(self, loader, config, **kwargs):
     """Infer SR images.
@@ -195,11 +167,9 @@ def infer(self, loader, config, **kwargs):
     self._restore(config.epoch, v.map_location)
     it = loader.make_one_shot_iterator([1, -1, -1, -1], -1)
     if hasattr(it, '__len__'):
-      if len(it):
-        LOG.info('Inferring {} at epoch {}'.format(
-          self.model.name, self.last_epoch))
-      else:
+      if len(it) == 0:
         return
+      LOG.info(f"Inferring {self.model.name} at epoch {self.last_epoch}")
     # use original images in inferring
     self.model.to_eval()
     for items in tqdm.tqdm(it, 'Infer', ascii=True):
@@ -211,7 +181,7 @@ def fn_infer_each_step(self, pack):
     with torch.set_grad_enabled(False):
       if v.ensemble:
         # add self-ensemble boosting metric score
-        feature_ensemble = _ensemble_expand(pack['lr'])
+        feature_ensemble = Ensembler.expand(pack['lr'])
         outputs_ensemble = []
         for f in feature_ensemble:
           f = to_tensor(f, v.cuda)
@@ -221,11 +191,12 @@ def fn_infer_each_step(self, pack):
         outputs = []
         for i in range(len(outputs_ensemble[0])):
           outputs.append([j[i] for j in outputs_ensemble])
-        outputs = _ensemble_reduce_mean(outputs)
+        outputs = Ensembler.merge(outputs)
       else:
         feature = to_tensor(pack['lr'], v.cuda)
         outputs, _ = self.model.eval([feature])
         outputs = [from_tensor(x) for x in outputs]
     for fn in v.inference_results_hooks:
       outputs = fn(outputs, names=pack['name'])
-      if outputs is None: break
+      if outputs is None:
+        break
diff --git a/VSR/Backend/Torch/Models/Ops/Distortion.py b/VSR/Backend/Torch/Models/Ops/Distortion.py
index 6d731e5..74b0056 100644
--- a/VSR/Backend/Torch/Models/Ops/Distortion.py
+++ b/VSR/Backend/Torch/Models/Ops/Distortion.py
@@ -7,42 +7,36 @@
 
 import torch
 import torch.nn as nn
-import torchvision.transforms.functional as F
-from torchvision.transforms import Compose, Lambda, ToPILImage, ToTensor
 
 from VSR.Util.Math import gaussian_kernel
 from ...Util.Utility import gaussian_noise, imfilter, poisson_noise
 
 
-class Distortion(nn.Module):
-  """Randomly change the brightness, contrast and saturation of an image.
+class Distorter(nn.Module):
+  """Randomly add the noise and blur of an image.
 
   Args:
-      brightness (float or tuple of float (min, max)): How much to jitter brightness.
-          brightness_factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness]
-          or the given [min, max]. Should be non negative numbers.
-      contrast (float or tuple of float (min, max)): How much to jitter contrast.
-          contrast_factor is chosen uniformly from [max(0, 1 - contrast), 1 + contrast]
-          or the given [min, max]. Should be non negative numbers.
-      saturation (float or tuple of float (min, max)): How much to jitter saturation.
-          saturation_factor is chosen uniformly from [max(0, 1 - saturation), 1 + saturation]
-          or the given [min, max]. Should be non negative numbers.
-      hue (float or tuple of float (min, max)): How much to jitter hue.
-          hue_factor is chosen uniformly from [-hue, hue] or the given [min, max].
-          Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5.
+      gaussian_noise_std (float or tuple of float (min, max)): How much to
+          additive gaussian white noise. gaussian_noise_std is chosen uniformly
+          from [0, std] or the given [min, max]. Should be non negative numbers.
+      poisson_noise_std (float or tuple of float (min, max)): How much to
+          poisson noise. poisson_noise_std is chosen uniformly from [0, std] or
+          the given [min, max]. Should be non negative numbers.
+      gaussian_blur_std (float or tuple of float (min, max)): How much to
+          blur kernel. gaussian_blur_std is chosen uniformly from [0, std] or
+          the given [min, max]. Should be non negative numbers.
   """
 
-  def __init__(self, brightness=0, contrast=0, saturation=0, hue=0,
-               gaussian_noise_std=0, poisson_noise_std=0, gaussian_blur_std=0):
-    super(Distortion, self).__init__()
-    self.brightness = self._check_input(brightness, 'brightness')
-    self.contrast = self._check_input(contrast, 'contrast')
-    self.saturation = self._check_input(saturation, 'saturation')
-    self.hue = self._check_input(hue, 'hue', center=0, bound=(-0.5, 0.5),
-                                 clip_first_on_zero=False)
+  def __init__(self,
+               gaussian_noise_std=0,
+               poisson_noise_std=0,
+               gaussian_blur_std=0):
+    super(Distorter, self).__init__()
     self.awgn = self._check_input(gaussian_noise_std, 'awgn', center=0,
-                                  bound=(0, 0.75), clip_first_on_zero=True)
-    self.poisson = None
+                                  bound=(0, 75 / 255), clip_first_on_zero=True)
+    self.poisson = self._check_input(poisson_noise_std, 'poisson', center=0,
+                                     bound=(0, 50 / 255),
+                                     clip_first_on_zero=True)
     self.blur = self._check_input(gaussian_blur_std, 'blur', center=0)
     self.blur_padding = nn.ReflectionPad2d(7)
 
@@ -58,83 +52,38 @@ def _check_input(self, value, name, center=1, bound=(0, float('inf')),
       value = [center - value, center + value]
       if clip_first_on_zero:
         value[0] = max(value[0], 0)
-
     # if value is 0 or (1., 1.) for brightness/contrast/saturation
     # or (0., 0.) for hue, do nothing
     if value[0] == value[1] == center:
       value = None
     return value
 
-  @staticmethod
-  def get_params(brightness, contrast, saturation, hue, awgn, poisson, blur):
-    """Get a randomized transform to be applied on image.
-
-    Arguments are same as that of __init__.
-
-    Returns:
-        Transform which randomly adjusts brightness, contrast and
-        saturation in a random order.
-    """
-    transforms = []
-
-    brightness_factor = 0
-    if brightness is not None:
-      brightness_factor = random.uniform(brightness[0], brightness[1])
-      transforms.append(
-          Lambda(lambda img: F.adjust_brightness(img, brightness_factor)))
-    contrast_factor = 0
-    if contrast is not None:
-      contrast_factor = random.uniform(contrast[0], contrast[1])
-      transforms.append(
-          Lambda(lambda img: F.adjust_contrast(img, contrast_factor)))
-    saturation_factor = 0
-    if saturation is not None:
-      saturation_factor = random.uniform(saturation[0], saturation[1])
-      transforms.append(
-          Lambda(lambda img: F.adjust_saturation(img, saturation_factor)))
-    hue_factor = 0
-    if hue is not None:
-      hue_factor = random.uniform(hue[0], hue[1])
-      transforms.append(Lambda(lambda img: F.adjust_hue(img, hue_factor)))
-
-    random.shuffle(transforms)
-    transform = Compose([
-      ToPILImage('RGB'),
-      *transforms,
-      ToTensor()
-    ])
-    factors = [
-      brightness_factor, contrast_factor, saturation_factor, hue_factor
-    ]
-    return transform, factors
-
-  def forward(self, x):
-    img = [x_[0].cpu() for x_ in torch.split(x, 1, dim=0)]
+  def forward(self, img):
     factors = []
-    for i in range(len(img)):
-      # color jitter
-      transform, fac = self.get_params(self.brightness, self.contrast,
-                                       self.saturation, self.hue, self.awgn,
-                                       self.poisson, self.blur)
-      img[i] = transform(img[i])
-      # noise & blur
-      blur_factor = 0
-      if self.blur is not None:
-        blur_factor = random.uniform(*self.blur)
-        img[i] = imfilter(
-            img[i],
-            torch.tensor(gaussian_kernel(15, blur_factor),
-                         device=img[i].device),
-            self.blur_padding)[0]
-      awgn_factor = 0
-      if self.awgn is not None:
-        awgn_factor = random.uniform(*self.awgn)
-        img[i] += gaussian_noise(img[i], stddev=awgn_factor, channel_wise=False)
-      poisson_factor = 0
-      if self.poisson is not None:
-        poisson_factor = random.uniform(*self.poisson)
-        img[i] += poisson_noise(img[i], stddev=poisson_factor)
-      fac += [awgn_factor, poisson_factor, blur_factor]
-      factors.append(torch.tensor(fac))
-      img[i] = img[i].clamp(0, 1)
-    return torch.stack(img).to(x.device), torch.stack(factors).to(x.device)
+    # noise & blur
+    blur_factor = 0
+    if self.blur is not None:
+      blur_factor = random.uniform(*self.blur)
+      img = imfilter(
+          img,
+          torch.tensor(gaussian_kernel(15, blur_factor),
+                       device=img.device),
+          self.blur_padding)
+    awgn_factor = (0, 0, 0)
+    if self.awgn is not None:
+      _r = random.uniform(*self.awgn)
+      _g = random.uniform(*self.awgn)
+      _b = random.uniform(*self.awgn)
+      img += gaussian_noise(img, stddev=(_r, _g, _b))
+      awgn_factor = (_r, _g, _b)
+    poisson_factor = (_r, _g, _b)
+    if self.poisson is not None:
+      _r = random.uniform(*self.poisson)
+      _g = random.uniform(*self.poisson)
+      _b = random.uniform(*self.poisson)
+      img += poisson_noise(img, stddev=(_r, _g, _b))
+      poisson_factor = (_r, _g, _b)
+    fac = [blur_factor, *awgn_factor, *poisson_factor]
+    factors.append(torch.tensor(fac))
+    img = img.clamp(0, 1)
+    return img, torch.stack(factors).to(img.device)
diff --git a/VSR/Backend/Torch/Models/Optim/SISR.py b/VSR/Backend/Torch/Models/Optim/SISR.py
index 03a930a..527dabf 100644
--- a/VSR/Backend/Torch/Models/Optim/SISR.py
+++ b/VSR/Backend/Torch/Models/Optim/SISR.py
@@ -140,7 +140,7 @@ class PerceptualOptimizer(L1Optimizer):
   """
 
   def __init__(self, scale, channel, image_weight=1, feature_weight=0,
-               gan_weight=0, patch_size=128, **kwargs):
+               gan_weight=0, **kwargs):
     super(PerceptualOptimizer, self).__init__(scale, channel, **kwargs)
     self.use_vgg = feature_weight > 0
     self.use_gan = gan_weight > 0
@@ -172,7 +172,7 @@ def train(self, inputs, labels, learning_rate=None):
     image_loss = self.pixel_cri(sr, labels[0])
     loss = image_loss * self.w[0]
     log = {
-      'image': image_loss.detach().cpu().numpy()
+      'image_loss': image_loss.detach().cpu().numpy()
     }
     if self.use_vgg:
       self.feature[0].eval()
diff --git a/VSR/Backend/Torch/Models/Srmd.py b/VSR/Backend/Torch/Models/Srmd.py
index a24fa41..c3fa77e 100644
--- a/VSR/Backend/Torch/Models/Srmd.py
+++ b/VSR/Backend/Torch/Models/Srmd.py
@@ -49,22 +49,23 @@ def forward(self, x, kernel=None, noise=None):
 
 
 class SRMD(PerceptualOptimizer):
-  def __init__(self, scale, channel, degradation=None, **kwargs):
+  def __init__(self, scale, channel, degradation=None, layers=12, filters=128,
+               pca_length=15, **kwargs):
     degradation = degradation or {}
     noise = degradation.get('noise', 0)
     if noise > 1:
       noise /= 255
     assert 0 <= noise <= 1
     self.pca_dim = kwargs.get('pca_dim', 15)
-    self.kernel_size = degradation.get('kernel_size', 15)
+    self.kernel_size = degradation.get('kernel_size', pca_length)
     self.ktype = degradation.get('kernel_type', 'isotropic')
     self.l1 = degradation.get('l1', 0.1)
     self.l2 = degradation.get('l2', 0.1)
     self.theta = degradation.get('theta', 0.1)
     self.noise = noise
     self.blur_padding = torch.nn.ReflectionPad2d(7)
-    self.srmd = Net(scale=scale, channels=channel, **kwargs)
-    super(SRMD, self).__init__(scale, channel)
+    self.srmd = Net(scale, channel, layers, filters, pca_length)
+    super(SRMD, self).__init__(scale, channel, **kwargs)
 
   def gen_kernel(self, ktype, ksize, l1, l2=None, theta=0):
     if ktype == 'isotropic':
diff --git a/VSR/Backend/Torch/Util/Distortion.py b/VSR/Backend/Torch/Util/Distortion.py
deleted file mode 100644
index 15a45b7..0000000
--- a/VSR/Backend/Torch/Util/Distortion.py
+++ /dev/null
@@ -1,140 +0,0 @@
-#  Copyright (c) 2017-2020 Wenyi Tang.
-#  Author: Wenyi Tang
-#  Email: wenyitang@outlook.com
-#  Update: 2020 - 6 - 9
-
-import random
-
-import torch
-import torch.nn as nn
-import torchvision.transforms.functional as F
-from torchvision.transforms import Compose, Lambda, ToPILImage, ToTensor
-
-from VSR.Util.Math import gaussian_kernel
-from ..Util.Utility import gaussian_noise, imfilter, poisson_noise
-
-
-class Distortion(nn.Module):
-  """Randomly change the brightness, contrast and saturation of an image.
-
-  Args:
-      brightness (float or tuple of float (min, max)): How much to jitter brightness.
-          brightness_factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness]
-          or the given [min, max]. Should be non negative numbers.
-      contrast (float or tuple of float (min, max)): How much to jitter contrast.
-          contrast_factor is chosen uniformly from [max(0, 1 - contrast), 1 + contrast]
-          or the given [min, max]. Should be non negative numbers.
-      saturation (float or tuple of float (min, max)): How much to jitter saturation.
-          saturation_factor is chosen uniformly from [max(0, 1 - saturation), 1 + saturation]
-          or the given [min, max]. Should be non negative numbers.
-      hue (float or tuple of float (min, max)): How much to jitter hue.
-          hue_factor is chosen uniformly from [-hue, hue] or the given [min, max].
-          Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5.
-  """
-
-  def __init__(self, brightness=0, contrast=0, saturation=0, hue=0,
-               gaussian_noise_std=0, poisson_noise_std=0, gaussian_blur_std=0):
-    super(Distortion, self).__init__()
-    self.brightness = self._check_input(brightness, 'brightness')
-    self.contrast = self._check_input(contrast, 'contrast')
-    self.saturation = self._check_input(saturation, 'saturation')
-    self.hue = self._check_input(hue, 'hue', center=0, bound=(-0.5, 0.5),
-                                 clip_first_on_zero=False)
-    self.awgn = self._check_input(gaussian_noise_std, 'awgn', center=0,
-                                  bound=(0, 0.75), clip_first_on_zero=True)
-    self.poisson = None
-    self.blur = self._check_input(gaussian_blur_std, 'blur', center=0)
-    self.blur_padding = nn.ReflectionPad2d(7)
-
-  def _check_input(self, value, name, center=1, bound=(0, float('inf')),
-                   clip_first_on_zero=True):
-    if isinstance(value, (tuple, list)) and len(value) == 2:
-      if not bound[0] <= value[0] <= value[1] <= bound[1]:
-        raise ValueError("{} values should be between {}".format(name, bound))
-    else:
-      if value < 0:
-        raise ValueError(
-            "If {} is a single number, it must be non negative.".format(name))
-      value = [center - value, center + value]
-      if clip_first_on_zero:
-        value[0] = max(value[0], 0)
-
-    # if value is 0 or (1., 1.) for brightness/contrast/saturation
-    # or (0., 0.) for hue, do nothing
-    if value[0] == value[1] == center:
-      value = None
-    return value
-
-  @staticmethod
-  def get_params(brightness, contrast, saturation, hue, awgn, poisson, blur):
-    """Get a randomized transform to be applied on image.
-
-    Arguments are same as that of __init__.
-
-    Returns:
-        Transform which randomly adjusts brightness, contrast and
-        saturation in a random order.
-    """
-    transforms = []
-
-    brightness_factor = 0
-    if brightness is not None:
-      brightness_factor = random.uniform(brightness[0], brightness[1])
-      transforms.append(
-          Lambda(lambda img: F.adjust_brightness(img, brightness_factor)))
-    contrast_factor = 0
-    if contrast is not None:
-      contrast_factor = random.uniform(contrast[0], contrast[1])
-      transforms.append(
-          Lambda(lambda img: F.adjust_contrast(img, contrast_factor)))
-    saturation_factor = 0
-    if saturation is not None:
-      saturation_factor = random.uniform(saturation[0], saturation[1])
-      transforms.append(
-          Lambda(lambda img: F.adjust_saturation(img, saturation_factor)))
-    hue_factor = 0
-    if hue is not None:
-      hue_factor = random.uniform(hue[0], hue[1])
-      transforms.append(Lambda(lambda img: F.adjust_hue(img, hue_factor)))
-
-    random.shuffle(transforms)
-    transform = Compose([
-      ToPILImage('RGB'),
-      *transforms,
-      ToTensor()
-    ])
-    factors = [
-      brightness_factor, contrast_factor, saturation_factor, hue_factor
-    ]
-    return transform, factors
-
-  def forward(self, x):
-    img = [x_[0].cpu() for x_ in torch.split(x, 1, dim=0)]
-    factors = []
-    for i in range(len(img)):
-      # color jitter
-      transform, fac = self.get_params(self.brightness, self.contrast,
-                                       self.saturation, self.hue, self.awgn,
-                                       self.poisson, self.blur)
-      img[i] = transform(img[i])
-      # noise & blur
-      blur_factor = 0
-      if self.blur is not None:
-        blur_factor = random.uniform(*self.blur)
-        img[i] = imfilter(
-            img[i],
-            torch.tensor(gaussian_kernel(15, blur_factor),
-                         device=img[i].device),
-            self.blur_padding)[0]
-      awgn_factor = 0
-      if self.awgn is not None:
-        awgn_factor = random.uniform(*self.awgn)
-        img[i] += gaussian_noise(img[i], stddev=awgn_factor, channel_wise=False)
-      poisson_factor = 0
-      if self.poisson is not None:
-        poisson_factor = random.uniform(*self.poisson)
-        img[i] += poisson_noise(img[i], stddev=poisson_factor)
-      fac += [awgn_factor, poisson_factor, blur_factor]
-      factors.append(torch.tensor(fac))
-      img[i] = img[i].clamp(0, 1)
-    return torch.stack(img).to(x.device), torch.stack(factors).to(x.device)
diff --git a/VSR/Backend/Torch/Util/Utility.py b/VSR/Backend/Torch/Util/Utility.py
index 243dff0..96cfb3e 100644
--- a/VSR/Backend/Torch/Util/Utility.py
+++ b/VSR/Backend/Torch/Util/Utility.py
@@ -6,6 +6,7 @@
 import torch
 import torch.nn.functional as F
 
+from VSR.Backend import DATA_FORMAT
 from VSR.Util.Math import weights_downsample, weights_upsample
 
 
@@ -193,26 +194,32 @@ def imfilter(image: torch.Tensor, kernel: torch.Tensor, padding=None):
     return torch.cat(ret)
 
 
-def poisson_noise(inputs: torch.Tensor, stddev=None, sigma_max=0.16):
+def poisson_noise(inputs: torch.Tensor, stddev=None, sigma_max=0.16,
+                  channel_wise=1):
   """Add poisson noise to inputs."""
 
   if stddev is None:
-    stddev = torch.rand(inputs.shape[-1]) * sigma_max
+    stddev = torch.rand(channel_wise) * sigma_max
   stddev = torch.tensor(stddev, device=inputs.device)
-  stddev = stddev.reshape([1] * (inputs.ndim - 1) + [-1])
+  if DATA_FORMAT == 'channels_first':
+    stddev = stddev.reshape([1, -1] + [1] * (inputs.ndim - 2))
+  else:
+    stddev = stddev.reshape([1] * (inputs.ndim - 1) + [-1])
   sigma_map = (1 - inputs) * stddev
   return torch.randn_like(inputs) * sigma_map
 
 
 def gaussian_noise(inputs: torch.Tensor, stddev=None, sigma_max=0.06,
-                   channel_wise=True):
+                   channel_wise=1):
   """Add channel wise gaussian noise."""
 
-  channel = inputs.shape[-1] if channel_wise else 1
   if stddev is None:
-    stddev = torch.rand(channel) * sigma_max
+    stddev = torch.rand(channel_wise) * sigma_max
   stddev = torch.tensor(stddev, device=inputs.device)
-  stddev = stddev.reshape([1] * (inputs.ndim - 1) + [-1])
+  if DATA_FORMAT == 'channels_first':
+    stddev = stddev.reshape([1, -1] + [1] * (inputs.ndim - 2))
+  else:
+    stddev = stddev.reshape([1] * (inputs.ndim - 1) + [-1])
   noise_map = torch.randn_like(inputs) * stddev
   return noise_map
 
diff --git a/VSR/Util/Ensemble.py b/VSR/Util/Ensemble.py
new file mode 100644
index 0000000..11fb80f
--- /dev/null
+++ b/VSR/Util/Ensemble.py
@@ -0,0 +1,38 @@
+#  Copyright (c) 2017-2020 Wenyi Tang.
+#  Author: Wenyi Tang
+#  Email: wenyitang@outlook.com
+#  Update: 2020 - 6 - 17
+
+import numpy as np
+
+
+class Ensembler:
+  @staticmethod
+  def expand(feature: np.ndarray):
+    r0 = feature.copy()
+    r1 = np.rot90(feature, 1, axes=[-3, -2])
+    r2 = np.rot90(feature, 2, axes=[-3, -2])
+    r3 = np.rot90(feature, 3, axes=[-3, -2])
+    r4 = np.flip(feature, axis=-2)
+    r5 = np.rot90(r4, 1, axes=[-3, -2])
+    r6 = np.rot90(r4, 2, axes=[-3, -2])
+    r7 = np.rot90(r4, 3, axes=[-3, -2])
+    return r0, r1, r2, r3, r4, r5, r6, r7
+
+  @staticmethod
+  def merge(outputs: [np.ndarray]):
+    results = []
+    for i in outputs:
+      outputs_ensemble = [
+        i[0],
+        np.rot90(i[1], 3, axes=[-3, -2]),
+        np.rot90(i[2], 2, axes=[-3, -2]),
+        np.rot90(i[3], 1, axes=[-3, -2]),
+        np.flip(i[4], axis=-2),
+        np.flip(np.rot90(i[5], 3, axes=[-3, -2]), axis=-2),
+        np.flip(np.rot90(i[6], 2, axes=[-3, -2]), axis=-2),
+        np.flip(np.rot90(i[7], 1, axes=[-3, -2]), axis=-2),
+      ]
+      results.append(
+          np.concatenate(outputs_ensemble).mean(axis=0, keepdims=True))
+    return results

From ede0b7278dbdde26fe348b06d1a27c8197df7298 Mon Sep 17 00:00:00 2001
From: Wenyi Tang <wenyi.tang@intel.com>
Date: Thu, 18 Jun 2020 17:03:05 +0800
Subject: [PATCH 06/12] In pytorch backend, abstract common optimizer for SISR
 task

---
 Train/par/pytorch/carn.yml                    |   1 +
 Train/par/pytorch/cubic.yml                   |   5 +
 Train/train.py                                |   3 +
 VSR/Backend/Keras/Models/Model.py             |   3 +
 VSR/Backend/TF/Framework/SuperResolution.py   |   3 +
 VSR/Backend/Torch/Models/Bicubic.py           |  51 +++++++++
 VSR/Backend/Torch/Models/Carn.py              |  46 ++------
 VSR/Backend/Torch/Models/Crdn.py              |  33 +-----
 VSR/Backend/Torch/Models/Dbpn.py              |  40 ++-----
 VSR/Backend/Torch/Models/Edsr.py              |  78 ++++---------
 VSR/Backend/Torch/Models/Esrgan.py            | 106 ++++--------------
 VSR/Backend/Torch/Models/Model.py             |  11 +-
 VSR/Backend/Torch/Models/Msrn.py              |  30 +----
 VSR/Backend/Torch/Models/Ops/Blocks.py        |  60 ++++++++--
 VSR/Backend/Torch/Models/Ops/Discriminator.py |  79 +++++++++----
 VSR/Backend/Torch/Models/Optim/SISR.py        |  17 ++-
 VSR/Backend/Torch/Models/Qprn.py              |   2 +-
 VSR/Backend/Torch/Models/Rcan.py              |  32 ++----
 VSR/Backend/Torch/Models/SRFeat.py            |   4 +-
 VSR/Backend/Torch/Models/Srmd.py              |   7 +-
 VSR/Backend/Torch/Models/__init__.py          |   4 +
 VSR/DataLoader/Loader.py                      |   2 +-
 prepare_data.py                               | 103 ++++++++++-------
 23 files changed, 351 insertions(+), 369 deletions(-)
 create mode 100644 Train/par/pytorch/cubic.yml
 create mode 100644 VSR/Backend/Torch/Models/Bicubic.py

diff --git a/Train/par/pytorch/carn.yml b/Train/par/pytorch/carn.yml
index 4bd6c31..bf4794f 100644
--- a/Train/par/pytorch/carn.yml
+++ b/Train/par/pytorch/carn.yml
@@ -3,6 +3,7 @@ carn:
     channel: 3
     multi_scale: 1  # change to 1 if use official pth file
     group: 1
+    clip: 10
 
 batch_shape: [16, 3, 64, 64]
 lr: 1.0e-4
diff --git a/Train/par/pytorch/cubic.yml b/Train/par/pytorch/cubic.yml
new file mode 100644
index 0000000..58ac9ba
--- /dev/null
+++ b/Train/par/pytorch/cubic.yml
@@ -0,0 +1,5 @@
+cubic:
+    scale: 4
+    channel: 3
+
+batch_shape: [16, 3, 32, 32]
diff --git a/Train/train.py b/Train/train.py
index 8cb9957..4063d06 100644
--- a/Train/train.py
+++ b/Train/train.py
@@ -33,6 +33,7 @@
 g3.add_argument("--pretrain", help="specify the pre-trained model checkpoint or will search into `save_dir` if not specified")
 g3.add_argument("--export", help="export ONNX (torch backend) or protobuf (tf backend) (needs support from model)")
 g3.add_argument("-c", "--comment", default=None, help="extend a comment string after saving folder")
+g3.add_argument("--distributed", action="store_true")
 
 
 def main():
@@ -62,6 +63,8 @@ def main():
     model.cuda()
   if opt.pretrain:
     model.load(opt.pretrain)
+  if opt.distributed:
+    model.distributed()
   root = f'{opt.save_dir}/{opt.model}'
   if opt.comment:
     root += '_' + opt.comment
diff --git a/VSR/Backend/Keras/Models/Model.py b/VSR/Backend/Keras/Models/Model.py
index f698113..c5aace6 100644
--- a/VSR/Backend/Keras/Models/Model.py
+++ b/VSR/Backend/Keras/Models/Model.py
@@ -95,6 +95,9 @@ def cuda(self):
     """Move model to cuda device."""
     pass
 
+  def distributed(self):
+    pass
+
   def export(self, export_dir):
     """export keras model.
 
diff --git a/VSR/Backend/TF/Framework/SuperResolution.py b/VSR/Backend/TF/Framework/SuperResolution.py
index 312637a..843a786 100644
--- a/VSR/Backend/TF/Framework/SuperResolution.py
+++ b/VSR/Backend/TF/Framework/SuperResolution.py
@@ -87,6 +87,9 @@ def get_executor(self, root):
   def cuda(self):
     pass
 
+  def distributed(self):
+    pass
+
   def load(self, ckpt):
     self.pre_ckpt = ckpt
 
diff --git a/VSR/Backend/Torch/Models/Bicubic.py b/VSR/Backend/Torch/Models/Bicubic.py
new file mode 100644
index 0000000..b8d43a1
--- /dev/null
+++ b/VSR/Backend/Torch/Models/Bicubic.py
@@ -0,0 +1,51 @@
+#  Copyright (c) 2017-2020 Wenyi Tang.
+#  Author: Wenyi Tang
+#  Email: wenyitang@outlook.com
+#  Update: 2020 - 6 - 17
+
+# Non-trainable bicubic, for performance benchmarking and debugging
+
+import torch
+import torch.nn as nn
+import torchvision as tv
+
+from .Model import SuperResolution
+from ..Util.Metrics import psnr
+
+
+class Cubic(nn.Module):
+  def __init__(self, scale):
+    super(Cubic, self).__init__()
+    self.to_pil = tv.transforms.ToPILImage()
+    self.to_tensor = tv.transforms.ToTensor()
+    self.scale = scale
+
+  def forward(self, x):
+    ret = []
+    for img in [i[0] for i in x.split(1, dim=0)]:
+      img = self.to_pil(img.cpu())
+      w = img.width
+      h = img.height
+      img = img.resize([w * self.scale, h * self.scale], 3)
+      img = self.to_tensor(img)
+      ret.append(img)
+    return torch.stack(ret).to(x.device)
+
+
+class BICUBIC(SuperResolution):
+  def __init__(self, scale=4, channel=3, **kwargs):
+    super(BICUBIC, self).__init__(scale, channel, **kwargs)
+    self.cubic = Cubic(scale)
+    self.cri = nn.L1Loss()
+
+  def train(self, inputs, labels, learning_rate=None):
+    sr = self.cubic(inputs[0])
+    loss = self.cri(sr, labels[0])
+    return {'l1': loss.detach().cpu().numpy()}
+
+  def eval(self, inputs, labels=None, **kwargs):
+    metrics = {}
+    sr = self.cubic(inputs[0]).cpu().detach()
+    if labels is not None:
+      metrics['psnr'] = psnr(sr.numpy(), labels[0].cpu().numpy())
+    return [sr.numpy()], metrics
diff --git a/VSR/Backend/Torch/Models/Carn.py b/VSR/Backend/Torch/Models/Carn.py
index 489e0f4..e1fc7f0 100644
--- a/VSR/Backend/Torch/Models/Carn.py
+++ b/VSR/Backend/Torch/Models/Carn.py
@@ -9,10 +9,9 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-from .Model import SuperResolution
 from .Ops.Blocks import EasyConv2d, MeanShift, RB
 from .Ops.Scale import MultiscaleUpsample, Upsample
-from ..Util import Metrics
+from .Optim.SISR import L1Optimizer
 
 _logger = logging.getLogger("VSR.CARN")
 _logger.info("LICENSE: CARN is implemented by Namhyuk Ahn. "
@@ -138,41 +137,10 @@ def forward(self, x, scale=None):
     return out
 
 
-class CARN(SuperResolution):
-  def __init__(self, scale, channel, **kwargs):
-    super(CARN, self).__init__(scale, channel, **kwargs)
-    group = kwargs.get('group', 1)
-    ms = kwargs.get('multi_scale', 0)
-    self.clip = kwargs.get('clip', 10)
+class CARN(L1Optimizer):
+  def __init__(self, scale, channel, group=1, ms=0, **kwargs):
     self.carn = Net(group=group, scale=scale, multi_scale=ms)
-    self.opt = torch.optim.Adam(self.trainable_variables(), 1e-4)
-
-  def train(self, inputs, labels, learning_rate=None):
-    sr = self.carn(inputs[0], self.scale)
-    loss = F.l1_loss(sr, labels[0])
-    if learning_rate:
-      for param_group in self.opt.param_groups:
-        param_group["lr"] = learning_rate
-    self.opt.zero_grad()
-    loss.backward()
-    torch.nn.utils.clip_grad_norm_(self.carn.parameters(), self.clip)
-    self.opt.step()
-    return {'l1': loss.detach().cpu().numpy()}
-
-  def eval(self, inputs, labels=None, **kwargs):
-    metrics = {}
-    sr = self.carn(inputs[0], self.scale).cpu().detach()
-    if labels is not None:
-      metrics['psnr'] = Metrics.psnr(sr.numpy(), labels[0].cpu().numpy())
-    return [sr.numpy()], metrics
-
-  def export(self, export_dir):
-    """An example of how to export ONNX format"""
-
-    # ONNX needs input placeholder to export model!
-    # Sounds stupid to set a 48x48 inputs.
-
-    device = list(self.carn.parameters())[0].device
-    inputs = torch.randn(1, self.channel, 144, 128, device=device)
-    scale = torch.tensor(self.scale, device=device)
-    torch.onnx.export(self.carn, (inputs, scale), export_dir / 'carn.onnx')
+    super(CARN, self).__init__(scale, channel, **kwargs)
+
+  def fn(self, x):
+    return self.carn(x)
diff --git a/VSR/Backend/Torch/Models/Crdn.py b/VSR/Backend/Torch/Models/Crdn.py
index cff32c8..be06202 100644
--- a/VSR/Backend/Torch/Models/Crdn.py
+++ b/VSR/Backend/Torch/Models/Crdn.py
@@ -8,10 +8,8 @@
 import torch.nn.functional as F
 
 from VSR.Util.Utility import to_list
-from . import Model
 from .Ops.Blocks import CascadeRdn
-from ..Framework.Summary import get_writer
-from ..Util import Metrics
+from .Optim.SISR import L1Optimizer
 
 
 class Upsample(nn.Module):
@@ -67,29 +65,10 @@ def forward(self, inputs):
     return out
 
 
-class CRDN(Model.SuperResolution):
-  def __init__(self, **kwargs):
-    super(CRDN, self).__init__(scale=1, channel=3)
+class CRDN(L1Optimizer):
+  def __init__(self, channel=3, scale=1, **kwargs):
     self.rsr = Crdn()
-    self.opt = torch.optim.Adam(self.trainable_variables(), 1e-4)
+    super(CRDN, self).__init__(scale=scale, channel=channel, **kwargs)
 
-  def train(self, inputs, labels, learning_rate=None):
-    sr = self.rsr(inputs[0])
-    loss = F.l1_loss(sr, labels[0])
-    if learning_rate:
-      for param_group in self.opt.param_groups:
-        param_group["lr"] = learning_rate
-    self.opt.zero_grad()
-    loss.backward()
-    self.opt.step()
-    return {'l1': loss.detach().cpu().numpy()}
-
-  def eval(self, inputs, labels=None, **kwargs):
-    metrics = {}
-    sr = self.rsr(inputs[0]).cpu().detach()
-    if labels is not None:
-      metrics['psnr'] = Metrics.psnr(sr.numpy(), labels[0].cpu().numpy())
-      writer = get_writer(self.name)
-      if writer is not None:
-        writer.image('clean', sr)
-    return [sr.numpy()], metrics
+  def fn(self, x):
+    return self.rsr(x)
diff --git a/VSR/Backend/Torch/Models/Dbpn.py b/VSR/Backend/Torch/Models/Dbpn.py
index adfdf72..8055e35 100644
--- a/VSR/Backend/Torch/Models/Dbpn.py
+++ b/VSR/Backend/Torch/Models/Dbpn.py
@@ -7,11 +7,9 @@
 
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 
-from .Model import SuperResolution
 from .Ops.Blocks import EasyConv2d
-from ..Util import Metrics
+from .Optim.SISR import L1Optimizer
 
 _logger = logging.getLogger("VSR.DBPN")
 _logger.info("LICENSE: DBPN is implemented by Haris. "
@@ -143,31 +141,11 @@ def get_kernel_stride(scale):
       return 12, 8
 
 
-class DBPN(SuperResolution):
-  def __init__(self, channel, scale, **kwargs):
-    super(DBPN, self).__init__(scale, channel)
-    self.body = Dbpn(channel, scale, **kwargs)
-    self.opt = torch.optim.Adam(self.trainable_variables(), 1e-4)
-
-  def train(self, inputs, labels, learning_rate=None):
-    sr = self.body(inputs[0])
-    loss = F.l1_loss(sr, labels[0])
-    if learning_rate:
-      for param_group in self.opt.param_groups:
-        param_group["lr"] = learning_rate
-    self.opt.zero_grad()
-    loss.backward()
-    self.opt.step()
-    return {'l1': loss.detach().cpu().numpy()}
-
-  def eval(self, inputs, labels=None, **kwargs):
-    metrics = {}
-    sr = self.body(inputs[0]).cpu().detach()
-    if labels is not None:
-      metrics['psnr'] = Metrics.psnr(sr.numpy(), labels[0].cpu().numpy())
-    return [sr.numpy()], metrics
-
-  def export(self, export_dir):
-    device = list(self.body.parameters())[0].device
-    inputs = torch.randn(1, self.channel, 144, 128, device=device)
-    torch.onnx.export(self.body, (inputs,), export_dir / 'dbpn.onnx')
+class DBPN(L1Optimizer):
+  def __init__(self, channel, scale, base_filter=64, feat=256, num_stages=7,
+               **kwargs):
+    self.body = Dbpn(channel, scale, base_filter, feat, num_stages)
+    super(DBPN, self).__init__(scale, channel, **kwargs)
+
+  def fn(self, x):
+    return self.body(x)
diff --git a/VSR/Backend/Torch/Models/Edsr.py b/VSR/Backend/Torch/Models/Edsr.py
index b6390f6..db54790 100644
--- a/VSR/Backend/Torch/Models/Edsr.py
+++ b/VSR/Backend/Torch/Models/Edsr.py
@@ -6,14 +6,11 @@
 import logging
 import random
 
-import torch
 import torch.nn as nn
-import torch.nn.functional as F
 
-from .Model import SuperResolution
 from .Ops.Blocks import EasyConv2d, MeanShift, RB
 from .Ops.Scale import MultiscaleUpsample, Upsample
-from ..Util import Metrics
+from .Optim.SISR import L1Optimizer
 
 _logger = logging.getLogger("VSR.EDSR")
 _logger.info("LICENSE: EDSR is implemented by Bee Lim. "
@@ -91,57 +88,28 @@ def forward(self, x, scale):
     return x
 
 
-class EDSR(SuperResolution):
-  def __init__(self, scale, channel, rgb_range=255, **kwargs):
-    super(EDSR, self).__init__(scale, channel)
+class EDSR(L1Optimizer):
+  def __init__(self, scale, channel, n_resblocks=16, n_feats=64, rgb_range=255,
+               **kwargs):
     self.rgb_range = rgb_range
-    self.edsr = Edsr(scale, channel, rgb_range=rgb_range, **kwargs)
-    self.opt = torch.optim.Adam(self.trainable_variables(), 1e-4)
-
-  def train(self, inputs, labels, learning_rate=None):
-    sr = self.edsr(inputs[0] * self.rgb_range) / self.rgb_range
-    loss = F.l1_loss(sr, labels[0])
-    if learning_rate:
-      for param_group in self.opt.param_groups:
-        param_group["lr"] = learning_rate
-    self.opt.zero_grad()
-    loss.backward()
-    self.opt.step()
-    return {'l1': loss.detach().cpu().numpy()}
-
-  def eval(self, inputs, labels=None, **kwargs):
-    metrics = {}
-    sr = self.edsr(inputs[0] * self.rgb_range) / self.rgb_range
-    sr = sr.cpu().detach()
-    if labels is not None:
-      metrics['psnr'] = Metrics.psnr(sr.numpy(), labels[0].cpu().numpy())
-    return [sr.numpy()], metrics
-
-
-class MSDR(SuperResolution):
-  def __init__(self, scale, channel, rgb_range=255, **kwargs):
-    super(MSDR, self).__init__(scale, channel)
+    self.edsr = Edsr(scale, channel, n_resblocks, n_feats, rgb_range)
+    super(EDSR, self).__init__(scale, channel, **kwargs)
+
+  def fn(self, x):
+    return self.edsr(x * self.rgb_range) / self.rgb_range
+
+
+class MSDR(L1Optimizer):
+  def __init__(self, scale, channel, n_resblocks=16, n_feats=64, rgb_range=255,
+               **kwargs):
     self.rgb_range = rgb_range
     self.scales = (2, 3, 4)
-    self.mdsr = Mdsr(self.scales, channel, rgb_range=rgb_range, **kwargs)
-    self.opt = torch.optim.Adam(self.trainable_variables(), 1e-4)
-
-  def train(self, inputs, labels, learning_rate=None):
-    scale = self.scales[random.randint(0, 3)]
-    sr = self.mdsr(inputs[0] * self.rgb_range, scale) / self.rgb_range
-    loss = F.l1_loss(sr, labels[0])
-    if learning_rate:
-      for param_group in self.opt.param_groups:
-        param_group["lr"] = learning_rate
-    self.opt.zero_grad()
-    loss.backward()
-    self.opt.step()
-    return {'l1': loss.detach().cpu().numpy()}
-
-  def eval(self, inputs, labels=None, **kwargs):
-    metrics = {}
-    sr = self.mdsr(inputs[0] * self.rgb_range, self.scale) / self.rgb_range
-    sr = sr.cpu().detach()
-    if labels is not None:
-      metrics['psnr'] = Metrics.psnr(sr.numpy(), labels[0].cpu().numpy())
-    return [sr.numpy()], metrics
+    self.mdsr = Mdsr(self.scales, channel, n_resblocks, n_feats, rgb_range)
+    super(MSDR, self).__init__(scale, channel, **kwargs)
+
+  def fn(self, x):
+    if self.mdsr.training:
+      scale = self.scales[random.randint(0, 3)]
+    else:
+      scale = self.scale
+    return self.mdsr(x * self.rgb_range, scale) / self.rgb_range
diff --git a/VSR/Backend/Torch/Models/Esrgan.py b/VSR/Backend/Torch/Models/Esrgan.py
index c4086be..794c372 100644
--- a/VSR/Backend/Torch/Models/Esrgan.py
+++ b/VSR/Backend/Torch/Models/Esrgan.py
@@ -6,17 +6,12 @@
 import logging
 
 import numpy as np
-import torch
 import torch.nn as nn
-import torch.nn.functional as F
 
-from .Model import SuperResolution
 from .Ops.Blocks import Activation, EasyConv2d, Rrdb
 from .Ops.Discriminator import DCGAN
-from .Ops.Loss import VggFeatureLoss, gan_bce_loss
 from .Ops.Scale import Upsample
-from ..Framework.Summary import get_writer
-from ..Util import Metrics
+from .Optim.SISR import PerceptualOptimizer
 
 _logger = logging.getLogger("VSR.ESRGAN")
 _logger.info("LICENSE: ESRGAN is implemented by Xintao Wang. "
@@ -28,12 +23,14 @@ def __init__(self, channel, scale, nf, nb, gc=32):
     super(RRDB_Net, self).__init__()
     self.head = EasyConv2d(channel, nf, kernel_size=3)
     rb_blocks = [
-      Rrdb(nf, gc, 5, 0.2, kernel_size=3, activation=Activation('lrelu', 0.2))
+      Rrdb(nf, gc, 5, 0.2, kernel_size=3,
+           activation=Activation('lrelu', negative_slope=0.2))
       for _ in range(nb)]
     LR_conv = EasyConv2d(nf, nf, kernel_size=3)
     upsampler = [Upsample(nf, scale, 'nearest',
-                          activation=Activation('lrelu', 0.2))]
-    HR_conv0 = EasyConv2d(nf, nf, kernel_size=3, activation='lrelu')
+                          activation=Activation('lrelu', negative_slope=0.2))]
+    HR_conv0 = EasyConv2d(nf, nf, kernel_size=3, activation='lrelu',
+                          negative_slope=0.2)
     HR_conv1 = EasyConv2d(nf, channel, kernel_size=3)
     self.body = nn.Sequential(*rb_blocks, LR_conv)
     self.tail = nn.Sequential(*upsampler, HR_conv0, HR_conv1)
@@ -45,82 +42,21 @@ def forward(self, x):
     return x
 
 
-class ESRGAN(SuperResolution):
+class ESRGAN(PerceptualOptimizer):
   def __init__(self, channel, scale, patch_size=128, weights=(0.01, 1, 5e-3),
                nf=64, nb=23, gc=32, **kwargs):
-    super(ESRGAN, self).__init__(scale, channel)
-    self.use_vgg = weights[1] > 0
-    self.use_gan = weights[2] > 0
-    if self.use_gan:
-      self.dnet = DCGAN(3, np.log2(patch_size // 4) * 2, 'bn')
-      self.optd = torch.optim.Adam(self.trainable_variables('dnet'), 1e-4)
     self.rrdb = RRDB_Net(channel, scale, nf, nb, gc)
-    self.optg = torch.optim.Adam(self.trainable_variables('rrdb'), 1e-4)
-    if self.use_vgg:
-      self.vgg = [VggFeatureLoss(['block5_conv4'], True)]
-    # image, vgg, gan
-    self.w = weights
-
-  def cuda(self):
-    super(ESRGAN, self).cuda()
-    if self.use_vgg:
-      self.vgg[0].cuda()
-
-  def train(self, inputs, labels, learning_rate=None):
-    sr = self.rrdb(inputs[0])
-    for opt in self.opts.values():
-      if learning_rate:
-        for param_group in opt.param_groups:
-          param_group["lr"] = learning_rate
-    image_loss = F.l1_loss(sr, labels[0])
-    loss = image_loss * self.w[0]
-    if self.use_vgg:
-      feature_loss = F.l1_loss(self.vgg[0](sr)[0], self.vgg[0](labels[0])[0])
-      loss += feature_loss * self.w[1]
-    if self.use_gan:
-      # update G
-      self.optg.zero_grad()
-      fake = self.dnet(sr)
-      gan_loss_g = gan_bce_loss(fake, True)
-      loss += gan_loss_g * self.w[2]
-      loss.backward()
-      self.optg.step()
-      # update D
-      self.optd.zero_grad()
-      real = self.dnet(labels[0])
-      fake = self.dnet(sr.detach())
-      loss_d = gan_bce_loss(real, True) + gan_bce_loss(fake, False)
-      loss_d.backward()
-      self.optd.step()
-      return {
-        'loss': loss.detach().cpu().numpy(),
-        'image': image_loss.detach().cpu().numpy(),
-        'loss_g': gan_loss_g.detach().cpu().numpy(),
-        'loss_d': loss_d.detach().cpu().numpy()
-      }
-    else:
-      self.optg.zero_grad()
-      loss.backward()
-      self.optg.step()
-      return {
-        'loss': loss.detach().cpu().numpy(),
-        'image': image_loss.detach().cpu().numpy()
-      }
-
-  def eval(self, inputs, labels=None, **kwargs):
-    metrics = {}
-    sr = self.rrdb(inputs[0]).cpu().detach()
-    if labels is not None:
-      metrics['psnr'] = Metrics.psnr(sr.numpy(), labels[0].cpu().numpy())
-      writer = get_writer(self.name)
-      if writer is not None:
-        step = kwargs.get('epoch')
-        writer.image('sr', sr.clamp(0, 1), step=step)
-        writer.image('lr', inputs[0], step=step)
-        writer.image('hr', labels[0], step=step)
-    return [sr.numpy()], metrics
-
-  def export(self, export_dir):
-    device = list(self.rrdb.parameters())[0].device
-    inputs = torch.randn(1, self.channel, 144, 128, device=device)
-    torch.onnx.export(self.rrdb, (inputs,), export_dir / 'rrdb.onnx')
+    super(ESRGAN, self).__init__(scale, channel,
+                                 discriminator=DCGAN,
+                                 discriminator_kwargs={
+                                   'channel': channel,
+                                   'scale': scale,
+                                   'num_layers': np.log2(patch_size // 4) * 2,
+                                   'norm': 'BN'
+                                 },
+                                 image_weight=weights[0],
+                                 feature_weight=weights[1],
+                                 gan_weight=weights[2], **kwargs)
+
+  def fn(self, x):
+    return self.rrdb(x)
diff --git a/VSR/Backend/Torch/Models/Model.py b/VSR/Backend/Torch/Models/Model.py
index a434e83..4c6c326 100644
--- a/VSR/Backend/Torch/Models/Model.py
+++ b/VSR/Backend/Torch/Models/Model.py
@@ -97,6 +97,15 @@ def cuda(self):
       if torch.cuda.is_available():
         self.modules[i] = self.modules[i].cuda()
 
+  def distributed(self):
+    if torch.distributed.is_available():
+      torch.distributed.init_process_group(backend='nccl', init_method="env://")
+    for i in self.modules:
+      if torch.distributed.is_available() and torch.distributed.is_nccl_available():
+        self.modules[i] = torch.nn.parallel.DistributedDataParallel(self.modules[i])
+      else:
+        self.modules[i] = torch.nn.DataParallel(self.modules[i])
+
   def export(self, export_dir):
     """export ONNX model.
 
@@ -159,7 +168,7 @@ class SuperResolution(BasicModel):
   """A default model for (video) super-resolution"""
 
   def __init__(self, scale, channel, **kwargs):
-    super(SuperResolution, self).__init__(**kwargs)
+    super(SuperResolution, self).__init__()
     self.scale = scale
     self.channel = channel
     # Default SR trainer
diff --git a/VSR/Backend/Torch/Models/Msrn.py b/VSR/Backend/Torch/Models/Msrn.py
index ecce3b7..fc5b602 100644
--- a/VSR/Backend/Torch/Models/Msrn.py
+++ b/VSR/Backend/Torch/Models/Msrn.py
@@ -9,10 +9,9 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-from .Model import SuperResolution
 from .Ops.Blocks import EasyConv2d, MeanShift
 from .Ops.Scale import Upsample
-from ..Util import Metrics
+from .Optim.SISR import L1Optimizer
 
 _logger = logging.getLogger("VSR.MSRN")
 _logger.info("LICENSE: MSRN is implemented by Juncheng Li. "
@@ -83,29 +82,12 @@ def forward(self, x):
     return x
 
 
-class MSRN(SuperResolution):
+class MSRN(L1Optimizer):
   def __init__(self, scale, channel, n_feats=64, n_blocks=8, rgb_range=255,
                **kwargs):
-    super(MSRN, self).__init__(scale, 3)
     self.rgb_range = rgb_range
     self.msrn = Msrn(channel, scale, n_feats, n_blocks, rgb_range)
-    self.opt = torch.optim.Adam(self.trainable_variables(), 1e-4)
-
-  def train(self, inputs, labels, learning_rate=None):
-    sr = self.msrn(inputs[0] * self.rgb_range) / self.rgb_range
-    loss = F.l1_loss(sr, labels[0])
-    if learning_rate:
-      for param_group in self.opt.param_groups:
-        param_group["lr"] = learning_rate
-    self.opt.zero_grad()
-    loss.backward()
-    self.opt.step()
-    return {'l1': loss.detach().cpu().numpy()}
-
-  def eval(self, inputs, labels=None, **kwargs):
-    metrics = {}
-    sr = self.msrn(inputs[0] * self.rgb_range) / self.rgb_range
-    sr = sr.cpu().detach()
-    if labels is not None:
-      metrics['psnr'] = Metrics.psnr(sr.numpy(), labels[0].cpu().numpy())
-    return [sr.numpy()], metrics
+    super(MSRN, self).__init__(scale, channel, **kwargs)
+
+  def fn(self, x):
+    return self.msrn(x * self.rgb_range) / self.rgb_range
diff --git a/VSR/Backend/Torch/Models/Ops/Blocks.py b/VSR/Backend/Torch/Models/Ops/Blocks.py
index 2a1b970..3c69518 100644
--- a/VSR/Backend/Torch/Models/Ops/Blocks.py
+++ b/VSR/Backend/Torch/Models/Ops/Blocks.py
@@ -27,7 +27,7 @@ def __init__(self, mean_rgb, sub, rgb_range=1.0):
 
 
 class Activation(nn.Module):
-  def __init__(self, act, *args, **kwargs):
+  def __init__(self, act, **kwargs):
     super(Activation, self).__init__()
     if act is None:
       self.f = lambda t: t
@@ -37,9 +37,11 @@ def __init__(self, act, *args, **kwargs):
       if self.name == 'relu':
         self.f = nn.ReLU(in_place)
       elif self.name == 'prelu':
-        self.f = nn.PReLU()
+        self.f = nn.PReLU(num_parameters=kwargs.get('num_parameters', 1),
+                          init=kwargs.get('init', 0.25))
       elif self.name in ('lrelu', 'leaky', 'leakyrelu'):
-        self.f = nn.LeakyReLU(*args, inplace=in_place)
+        self.f = nn.LeakyReLU(negative_slope=kwargs.get('negative_slope', 1e-2),
+                              inplace=in_place)
       elif self.name == 'tanh':
         self.f = nn.Tanh()
       elif self.name == 'sigmoid':
@@ -52,9 +54,42 @@ def forward(self, x):
 
 
 class EasyConv2d(nn.Module):
+  """ Convolution maker, to construct commonly used conv block with default
+  configurations.
+
+  Support to build Conv2D, ConvTransposed2D, along with selectable normalization
+  and activations.
+  Support normalization:
+  - Batchnorm2D
+  - Spectralnorm2D
+  Support activation:
+  - Relu
+  - PRelu
+  - LeakyRelu
+  - Tanh
+  - Sigmoid
+  - Customized callable functions
+
+  Args:
+      in_channels (int): Number of channels in the input image
+      out_channels (int): Number of channels produced by the convolution
+      kernel_size (int or tuple): Size of the convolving kernel
+      stride (int or tuple, optional): Stride of the convolution. Default: 1
+      padding (str, optional): 'same' means $out_size=in_size // stride$ or
+                                $out_size=in_size * stride$ (ConvTransposed);
+                                'valid' means padding zero.
+      dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
+      groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
+      use_bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True``
+      use_bn (bool, optional): If ``True``, adds Batchnorm2D module to the output.
+      use_sn (bool, optional): If ``True``, adds Spectralnorm2D module to the output.
+      transposed (bool, optional): If ``True``, use ConvTransposed instead of Conv2D.
+  """
+
   def __init__(self, in_channels, out_channels, kernel_size, stride=1,
                padding='same', dilation=1, groups=1, activation=None,
-               use_bias=True, use_bn=False, use_sn=False, transposed=False):
+               use_bias=True, use_bn=False, use_sn=False, transposed=False,
+               **kwargs):
     super(EasyConv2d, self).__init__()
     padding = padding.lower()
     assert padding in ('same', 'valid')
@@ -74,9 +109,14 @@ def __init__(self, in_channels, out_channels, kernel_size, stride=1,
     if use_sn:
       net[0] = nn.utils.spectral_norm(net[0])
     if use_bn:
-      net += [nn.BatchNorm2d(out_channels)]
+      net += [nn.BatchNorm2d(
+          out_channels,
+          eps=kwargs.get('eps', 1e-5),
+          momentum=kwargs.get('momentum', 0.1),
+          affine=kwargs.get('affine', True),
+          track_running_stats=kwargs.get('track_running_stats', True))]
     if activation:
-      net += [Activation(activation, in_place=True)]
+      net += [Activation(activation, in_place=True, **kwargs)]
     self.body = nn.Sequential(*net)
 
   def forward(self, x):
@@ -329,15 +369,15 @@ def __init__(self, in_channels, out_channels, kernel_size, stride=1,
     self.stride = stride
     self.padding = padding
     self.padding_h = tuple(
-      k // 2 for k, s, p, d in zip(kernel_size, stride, padding, dilation))
+        k // 2 for k, s, p, d in zip(kernel_size, stride, padding, dilation))
     self.dilation = dilation
     self.groups = groups
     self.weight_ih = Parameter(
-      torch.Tensor(4 * out_channels, in_channels // groups, *kernel_size))
+        torch.Tensor(4 * out_channels, in_channels // groups, *kernel_size))
     self.weight_hh = Parameter(
-      torch.Tensor(4 * out_channels, out_channels // groups, *kernel_size))
+        torch.Tensor(4 * out_channels, out_channels // groups, *kernel_size))
     self.weight_ch = Parameter(
-      torch.Tensor(3 * out_channels, out_channels // groups, *kernel_size))
+        torch.Tensor(3 * out_channels, out_channels // groups, *kernel_size))
     if bias:
       self.bias_ih = Parameter(torch.Tensor(4 * out_channels))
       self.bias_hh = Parameter(torch.Tensor(4 * out_channels))
diff --git a/VSR/Backend/Torch/Models/Ops/Discriminator.py b/VSR/Backend/Torch/Models/Ops/Discriminator.py
index 2daea31..5c8b80d 100644
--- a/VSR/Backend/Torch/Models/Ops/Discriminator.py
+++ b/VSR/Backend/Torch/Models/Ops/Discriminator.py
@@ -9,14 +9,19 @@
 
 
 def _pull_conv_args(**kwargs):
-  f = kwargs.get('filters', 64)
-  ks = kwargs.get('kernel_size', 3)
-  activation = kwargs.get('activation', 'leaky')
-  bias = kwargs.get('bias', True)
-  norm = kwargs.get('norm', '')
+  def _get_and_pop(d: dict, key, default=None):
+    if key in d:
+      return d.pop(key)
+    return d.get(key, default)
+
+  f = _get_and_pop(kwargs, 'filters', 64)
+  ks = _get_and_pop(kwargs, 'kernel_size', 3)
+  activation = _get_and_pop(kwargs, 'activation', 'leaky')
+  bias = _get_and_pop(kwargs, 'bias', True)
+  norm = _get_and_pop(kwargs, 'norm', '')
   bn = norm.lower() in ('bn', 'batch')
   sn = norm.lower() in ('sn', 'spectral')
-  return f, ks, activation, bias, bn, sn
+  return f, ks, activation, bias, bn, sn, kwargs
 
 
 class DCGAN(nn.Module):
@@ -38,39 +43,40 @@ class DCGAN(nn.Module):
     favor 'A' and $n_{strided}=num_layers - 1$ in favor 'B'.
   """
 
-  def __init__(self, channel, num_layers, norm=None, favor='A', **kwargs):
+  def __init__(self, channel, num_layers, scale=4, norm=None, favor='A',
+               **kwargs):
     super(DCGAN, self).__init__()
-    f, ks, act, bias, bn, sn = _pull_conv_args(norm=norm, **kwargs)
+    f, ks, act, bias, bn, sn, unparsed = _pull_conv_args(norm=norm, **kwargs)
     net = [EasyConv2d(channel, f, ks, activation=act, use_bn=bn, use_sn=sn,
-                      use_bias=bias)]
+                      use_bias=bias, negative_slope=0.2)]
     self.n_strided = 0
     counter = 1
     assert favor in ('A', 'B', 'C'), "favor must be A | B | C"
     while True:
       f *= 2
       net.append(EasyConv2d(
-        f // 2, f, ks + 1, 2, activation=act, use_bias=bias, use_bn=bn,
-        use_sn=sn))
+          f // 2, f, ks + 1, 2, activation=act, use_bias=bias, use_bn=bn,
+          use_sn=sn, **unparsed))
       self.n_strided += 1
       counter += 1
       if counter >= num_layers:
         break
       if favor in ('A', 'C'):
         net.append(EasyConv2d(
-          f, f, ks, 1, activation=act, use_bias=bias, use_bn=bn,
-          use_sn=sn))
+            f, f, ks, 1, activation=act, use_bias=bias, use_bn=bn,
+            use_sn=sn, **unparsed))
         counter += 1
         if counter >= num_layers:
           break
     if favor == 'C':
       self.body = nn.Sequential(*net, nn.AdaptiveAvgPool2d(1))
       linear = [nn.Linear(f, 100, bias),
-                Activation(act, in_place=True),
+                Activation(act, in_place=True, **unparsed),
                 nn.Linear(100, 1, bias)]
     else:
       self.body = nn.Sequential(*net)
-      linear = [nn.Linear(f * 4 * 4, 100, bias),
-                Activation(act, in_place=True),
+      linear = [nn.Linear(f * scale * scale, 100, bias),
+                Activation(act, in_place=True, **unparsed),
                 nn.Linear(100, 1, bias)]
     if sn:
       linear[0] = nn.utils.spectral_norm(linear[0])
@@ -103,16 +109,16 @@ class Residual(nn.Module):
 
   def __init__(self, channel, num_residual, norm=None, favor='A', **kwargs):
     super(Residual, self).__init__()
-    f, ks, act, bias, bn, sn = _pull_conv_args(norm=norm, **kwargs)
+    f, ks, act, bias, bn, sn, unparsed = _pull_conv_args(norm=norm, **kwargs)
     net = [EasyConv2d(channel, f, ks, activation=act, use_bn=bn, use_sn=sn,
-                      use_bias=bias)]
+                      use_bias=bias, **unparsed)]
     for i in range(num_residual):
       net.append(RB(f, ks, act, bias, bn, sn, favor == 'A'))
       net.append(nn.AvgPool2d(2))
-    net.append(Activation(act, in_place=True))
+    net.append(Activation(act, in_place=True, **unparsed))
     self.body = nn.Sequential(*net)
     linear = [nn.Linear(f * 4 * 4, 100, bias),
-              Activation(act, in_place=True),
+              Activation(act, in_place=True, **unparsed),
               nn.Linear(100, 1, bias)]
     if sn:
       linear[0] = nn.utils.spectral_norm(linear[0])
@@ -124,3 +130,36 @@ def forward(self, x):
     assert x.size(2) == x.size(3) == 4 * 2 ** self.n_strided
     y = self.body(x).flatten(1)
     return self.linear(y)
+
+
+class PatchGAN(nn.Module):
+  """Defines a PatchGAN discriminator
+  Args:
+      channel: the number of channels in input images
+      num_layers: number of total cnn layers
+      norm: could be "None", "SN/Spectral" or "BN/Batch"
+  """
+
+  def __init__(self, channel, num_layers=3, norm=None, **kwargs):
+    super(PatchGAN, self).__init__()
+    f, ks, act, bias, bn, sn, unparsed = _pull_conv_args(norm=norm, **kwargs)
+    sequence = [
+      EasyConv2d(channel, f, ks + 1, 2, activation=act, use_bn=bn, use_sn=sn,
+                 use_bias=bias, **unparsed)]
+    in_c = f
+    out_c = f * 2
+    for n in range(1, num_layers):
+      sequence.append(
+          EasyConv2d(in_c, out_c, ks + 1, 2, activation=act, use_bn=bn,
+                     use_sn=sn, use_bias=bias, **unparsed))
+      in_c = out_c
+      out_c *= 2
+    sequence += [
+      EasyConv2d(in_c, out_c, ks, activation=act, use_bn=bn, use_sn=sn,
+                 use_bias=bias, **unparsed),
+      EasyConv2d(out_c, 1, 1)
+    ]
+    self.body = nn.Sequential(*sequence)
+
+  def forward(self, x):
+    return self.body(x)
diff --git a/VSR/Backend/Torch/Models/Optim/SISR.py b/VSR/Backend/Torch/Models/Optim/SISR.py
index 527dabf..ea10f6f 100644
--- a/VSR/Backend/Torch/Models/Optim/SISR.py
+++ b/VSR/Backend/Torch/Models/Optim/SISR.py
@@ -3,17 +3,16 @@
 #  Email: wenyitang@outlook.com
 #  Update: 2020 - 6 - 16
 
-import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
 from ..Model import SuperResolution
-from ..Ops.Discriminator import DCGAN
-from ..Ops.Loss import VggFeatureLoss, GeneratorLoss, DiscriminatorLoss
+from ..Ops.Discriminator import PatchGAN
+from ..Ops.Loss import DiscriminatorLoss, GeneratorLoss, VggFeatureLoss
 from ...Framework.Summary import get_writer
 from ...Util import Metrics
-from ...Util.Utility import pad_if_divide, upsample
+from ...Util.Utility import pad_if_divide
 
 
 def get_opt(opt_config, params, lr):
@@ -150,7 +149,15 @@ def __init__(self, scale, channel, image_weight=1, feature_weight=0,
       self.feature = [VggFeatureLoss(feature_lists, True)]
     if self.use_gan:
       # define D-net
-      self.dnet = DCGAN(3, 8, norm='BN', favor='C')
+      dnet = kwargs.get('discriminator', PatchGAN)
+      dnet_kw = kwargs.get('discriminator_kwargs', {
+        'channel': channel,
+        'num_layers': 3,
+        'norm': 'BN',
+        'activation': 'leaky',
+        'negative_slope': 0.2,
+      })
+      self.dnet = dnet(**dnet_kw)
       self.optd = torch.optim.Adam(self.trainable_variables('dnet'), 1e-4)
     # image, vgg, gan
     self.w = [image_weight, feature_weight, gan_weight]
diff --git a/VSR/Backend/Torch/Models/Qprn.py b/VSR/Backend/Torch/Models/Qprn.py
index 8a311d6..1f38609 100644
--- a/VSR/Backend/Torch/Models/Qprn.py
+++ b/VSR/Backend/Torch/Models/Qprn.py
@@ -111,7 +111,7 @@ def __init__(self, gain, scale, channel, **kwargs):
     self.qprn = Composer(channel, L=2, gain=gain)
     self.adam = torch.optim.Adam(self.trainable_variables('qprn'), 1e-4)
     if self.debug.gan:
-      self.dnet = DCGAN(channel * 4, 9, 'bn', 'A')
+      self.dnet = DCGAN(channel * 4, 9, scale, 'bn', 'A')
       self.adam_d = torch.optim.Adam(self.trainable_variables('dnet'), 1e-4)
     self._trainer = _Trainer
 
diff --git a/VSR/Backend/Torch/Models/Rcan.py b/VSR/Backend/Torch/Models/Rcan.py
index 7515d17..0dfe77c 100644
--- a/VSR/Backend/Torch/Models/Rcan.py
+++ b/VSR/Backend/Torch/Models/Rcan.py
@@ -10,6 +10,7 @@
 import torch.nn.functional as F
 
 from .Model import SuperResolution
+from .Optim.SISR import L1Optimizer
 from .Ops.Blocks import EasyConv2d, MeanShift, Rcab
 from .Ops.Scale import Upsample
 from ..Util import Metrics
@@ -64,30 +65,13 @@ def forward(self, x):
     return x
 
 
-class RCAN(SuperResolution):
+class RCAN(L1Optimizer):
   def __init__(self, channel, scale, n_resgroups, n_resblocks, n_feats,
-               reduction, **kwargs):
-    super(RCAN, self).__init__(scale, channel)
-    self.rgb_range = kwargs.get('rgb_range', 255)
+               reduction, rgb_range=255, **kwargs):
+    self.rgb_range = rgb_range
     self.rcan = Rcan(channel, scale, n_resgroups, n_resblocks, n_feats,
-                     reduction, self.rgb_range)
-    self.opt = torch.optim.Adam(self.trainable_variables(), 1e-4)
+                     reduction, rgb_range)
+    super(RCAN, self).__init__(scale, channel, **kwargs)
 
-  def train(self, inputs, labels, learning_rate=None):
-    sr = self.rcan(inputs[0] * self.rgb_range) / self.rgb_range
-    loss = F.l1_loss(sr, labels[0])
-    if learning_rate:
-      for param_group in self.opt.param_groups:
-        param_group["lr"] = learning_rate
-    self.opt.zero_grad()
-    loss.backward()
-    self.opt.step()
-    return {'l1': loss.detach().cpu().numpy()}
-
-  def eval(self, inputs, labels=None, **kwargs):
-    metrics = {}
-    sr = self.rcan(inputs[0] * self.rgb_range) / self.rgb_range
-    sr = sr.cpu().detach()
-    if labels is not None:
-      metrics['psnr'] = Metrics.psnr(sr.numpy(), labels[0].cpu().numpy())
-    return [sr.numpy()], metrics
+  def fn(self, x):
+    return self.rcan(x * self.rgb_range) / self.rgb_range
diff --git a/VSR/Backend/Torch/Models/SRFeat.py b/VSR/Backend/Torch/Models/SRFeat.py
index 5a1bf11..49e205a 100644
--- a/VSR/Backend/Torch/Models/SRFeat.py
+++ b/VSR/Backend/Torch/Models/SRFeat.py
@@ -64,11 +64,11 @@ def __init__(self, channel, scale, patch_size=64, weights=(1, 0.01, 0.01),
     self.gopt = torch.optim.Adam(self.trainable_variables('srfeat'), 1e-4)
     if self.use_gan:
       # vanilla image
-      self.dnet1 = DCGAN(channel, np.log2(patch_size // 4) * 2, 'bn')
+      self.dnet1 = DCGAN(channel, np.log2(patch_size // 4) * 2, scale, 'bn')
       self.dopt1 = torch.optim.Adam(self.trainable_variables('dnet1'), 1e-4)
     if self.use_feat_gan:
       # vgg feature
-      self.dnet2 = DCGAN(256, np.log2(patch_size // 16) * 2, 'bn')
+      self.dnet2 = DCGAN(256, np.log2(patch_size // 16) * 2, scale, 'bn')
       self.dopt2 = torch.optim.Adam(self.trainable_variables('dnet2'), 1e-4)
     self.vgg = [VggFeatureLoss(['block3_conv1'], True)]
     self.w = weights
diff --git a/VSR/Backend/Torch/Models/Srmd.py b/VSR/Backend/Torch/Models/Srmd.py
index c3fa77e..dccc88a 100644
--- a/VSR/Backend/Torch/Models/Srmd.py
+++ b/VSR/Backend/Torch/Models/Srmd.py
@@ -12,6 +12,7 @@
 from VSR.Util.Math import anisotropic_gaussian_kernel, gaussian_kernel
 from VSR.Util.PcaPrecompute import get_degradation
 from .Ops.Blocks import EasyConv2d
+from .Ops.Discriminator import DCGAN
 from .Optim.SISR import PerceptualOptimizer
 from ..Util.Utility import imfilter
 
@@ -65,7 +66,11 @@ def __init__(self, scale, channel, degradation=None, layers=12, filters=128,
     self.noise = noise
     self.blur_padding = torch.nn.ReflectionPad2d(7)
     self.srmd = Net(scale, channel, layers, filters, pca_length)
-    super(SRMD, self).__init__(scale, channel, **kwargs)
+    disc_opt = {
+      'channel': channel, 'num_layers': 10, 'scale': scale, 'norm': 'BN'
+    }
+    super(SRMD, self).__init__(scale, channel, discriminator=DCGAN,
+                               discriminator_kwargs=disc_opt, **kwargs)
 
   def gen_kernel(self, ktype, ksize, l1, l2=None, theta=0):
     if ktype == 'isotropic':
diff --git a/VSR/Backend/Torch/Models/__init__.py b/VSR/Backend/Torch/Models/__init__.py
index fe23104..e11379e 100644
--- a/VSR/Backend/Torch/Models/__init__.py
+++ b/VSR/Backend/Torch/Models/__init__.py
@@ -8,6 +8,7 @@
 __all__ = ['get_model', 'list_supported_models']
 
 models = {
+  'cubic': ('Bicubic', 'BICUBIC'),
   # alias: (file, class)
   'espcn': ('Classic', 'ESPCN'),
   'srcnn': ('Classic', 'SRCNN'),
@@ -44,6 +45,9 @@
   'edrn': ('NTIRE19', 'EDRN'),
   'frn': ('NTIRE19', 'FRN'),
   'ran': ('NTIRE19', 'RAN'),
+  # NTIRE 2020
+  'realsr': ('NTIRE20', 'RealSR'),
+  'esr': ('EfficientSR', 'ESR')
 }
 
 
diff --git a/VSR/DataLoader/Loader.py b/VSR/DataLoader/Loader.py
index b496e5d..79ee16b 100644
--- a/VSR/DataLoader/Loader.py
+++ b/VSR/DataLoader/Loader.py
@@ -17,7 +17,7 @@
 from ..Util import Utility
 from ..Util.ImageProcess import img_to_array
 
-FREE_MEMORY = virtual_memory().available * 0.5
+FREE_MEMORY = virtual_memory().available
 LOG = logging.getLogger('VSR.Loader')
 
 
diff --git a/prepare_data.py b/prepare_data.py
index 25251e8..6129067 100644
--- a/prepare_data.py
+++ b/prepare_data.py
@@ -42,48 +42,52 @@
   'SET14.zip': 'https://uofi.box.com/shared/static/igsnfieh4lz68l926l8xbklwsnnk8we9.zip',
   'SunHay80.zip': 'https://uofi.box.com/shared/static/rirohj4773jl7ef752r330rtqw23djt8.zip',
   'Urban100.zip': 'https://uofi.box.com/shared/static/65upg43jjd0a4cwsiqgl6o6ixube6klm.zip',
-  # 'VID4.zip': 'https://people.csail.mit.edu/celiu/CVPR2011/videoSR.zip',
   'BSD300.tgz': 'https://www2.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/segbench/BSDS300-images.tgz',
   'BSD500.tgz': 'http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/BSR/BSR_bsds500.tgz',
   '91image.rar': 'http://www.ifp.illinois.edu/~jyang29/codes/ScSR.rar',
   'waterloo.rar': 'http://ivc.uwaterloo.ca/database/WaterlooExploration/exploration_database_and_code.rar',
+  # Google Drive File ID.
+  # If you can't download from this file, visit url https://drive.google.com/open?id=<id>
+  # paste the file id into position <id>.
   'GOPRO_Large.zip': '1H0PIXvJH4c40pk7ou6nAwoxuR4Qh_Sa2',
   'MCL-V.rar': '1z41hdqR-bqNLcUWllPePzkfQW-I_A9ny',
   'vid4.zip': '1ogEdifL_krqJnFAHfGNqOSMuUg_Ud6fb',
 }
 WEIGHTS = {
-  'srcnn.tar.gz': 'https://github.com/LoSealL/Model/releases/download/srcnn/srcnn.tar.gz',
-  'edsr.zip': 'https://github.com/LoSealL/Model/releases/download/edsr/edsr.zip',
-  'dncnn.zip': 'https://github.com/LoSealL/Model/releases/download/DnCNN/dncnn.zip',
-  'carn.zip': 'https://github.com/LoSealL/Model/releases/download/carn/carn.zip',
-  # Google Drive File ID.
-  # If you can't download from this file, visit url https://drive.google.com/open?id=<id>
-  # paste the file id into position <id>.
-  'srdensenet.zip': '1aXAfRqZieY6mTfZUnErG84-9NfkQSeDw',
-  'vdsr.zip': '1hW5YDxXpmjO2IfAy8f29O7yf1M3fPIg1',
-  'msrn.zip': '1A0LoY3oB_VnArP3GzI1ILUNJbLAEjdtJ',
-  'vespcn.zip': '19u4YpsyThxW5dv4fhpMj7c5gZeEDKthm',
-  'dbpn.zip': '1ymtlOjhkGmad-od0zw7yTf17nWD4KMVi',
-  'idn.zip': '1Fh3rtvrKKLAK27r518T1M_JET_LWZAFQ',
-  'drsr_v2.zip': '1UrVNE6QMcQTW9Ks4P__JrRClb4IGTMYp',
-  'drsr_sc2.zip': '1xIRVG7jbTM9fcLQkwyGyJIjwF2rTbNEJ',
-  'drsr_sc4.zip': '1W-222rR2D2o-E99B4cXuUPBz2aCLuY_Z',
-  # PyTorch weights (Prefix "T")
-  'Tsrcnn.zip': 'https://github.com/LoSealL/Model/releases/download/srcnn/Tsrcnn.zip',
-  'Tespcn.zip': 'https://github.com/LoSealL/Model/releases/download/espcn/Tespcn.zip',
-  'Tvdsr.zip': 'https://github.com/LoSealL/Model/releases/download/vdsr/Tvdsr.zip',
-  'Tdrcn.zip': 'https://github.com/LoSealL/Model/releases/download/drcn/Tdrcn.zip',
-  'Tdrrn.zip': 'https://github.com/LoSealL/Model/releases/download/drrn/Tdrrn.zip',
-  'Tsofvsr.zip': 'https://github.com/LoSealL/Model/releases/download/sofvsr/SOFVSR_x4.zip',
-  'Tcarn.zip': 'https://github.com/LoSealL/Model/releases/download/carn/tcarn.zip',
-  'Tesrgan.zip': 'https://github.com/LoSealL/Model/releases/download/esrgan/esrgan.zip',
-  'Tfrvsr.zip': 'https://github.com/LoSealL/Model/releases/download/frvsr/FRVSR.zip',
-  'Tmldn.zip': 'https://github.com/LoSealL/Model/releases/download/mldn/drn.zip',
-  'Tcrdn.zip': 'https://github.com/LoSealL/Model/releases/download/crdn/rsr.zip',
-  'Trbpn.zip': '1Ozp5j-DBWJSpXY5GvxiEPKdfCaAbOXqu',
-  'Tspmc.zip': 'https://github.com/LoSealL/Model/releases/download/spmc/spmc.zip',
-  'Tsrmd.zip': '1ORKH05-aLSbQaWB4qQulIm2INoRufuD_',
-  'Tdbpn.zip': '1PbhtuMz1zF3-d16dthurJ0xIQ9uyMvkz'
+  'tensorflow': {
+    'srcnn.tar.gz': 'https://github.com/LoSealL/Model/releases/download/srcnn/srcnn.tar.gz',
+    'edsr.zip': 'https://github.com/LoSealL/Model/releases/download/edsr/edsr.zip',
+    'dncnn.zip': 'https://github.com/LoSealL/Model/releases/download/DnCNN/dncnn.zip',
+    'carn.zip': 'https://github.com/LoSealL/Model/releases/download/carn/carn.zip',
+    'srdensenet.zip': '1aXAfRqZieY6mTfZUnErG84-9NfkQSeDw',
+    'vdsr.zip': '1hW5YDxXpmjO2IfAy8f29O7yf1M3fPIg1',
+    'msrn.zip': '1A0LoY3oB_VnArP3GzI1ILUNJbLAEjdtJ',
+    'vespcn.zip': '19u4YpsyThxW5dv4fhpMj7c5gZeEDKthm',
+    'dbpn.zip': '1ymtlOjhkGmad-od0zw7yTf17nWD4KMVi',
+    'idn.zip': '1Fh3rtvrKKLAK27r518T1M_JET_LWZAFQ',
+    'drsr_v2.zip': '1UrVNE6QMcQTW9Ks4P__JrRClb4IGTMYp',
+    'drsr_sc2.zip': '1xIRVG7jbTM9fcLQkwyGyJIjwF2rTbNEJ',
+    'drsr_sc4.zip': '1W-222rR2D2o-E99B4cXuUPBz2aCLuY_Z',
+  },
+  'pytorch': {
+    'srcnn.zip': 'https://github.com/LoSealL/Model/releases/download/srcnn/Tsrcnn.zip',
+    'espcn.zip': 'https://github.com/LoSealL/Model/releases/download/espcn/Tespcn.zip',
+    'vdsr.zip': 'https://github.com/LoSealL/Model/releases/download/vdsr/Tvdsr.zip',
+    'drcn.zip': 'https://github.com/LoSealL/Model/releases/download/drcn/Tdrcn.zip',
+    'drrn.zip': 'https://github.com/LoSealL/Model/releases/download/drrn/Tdrrn.zip',
+    'sofvsr.zip': 'https://github.com/LoSealL/Model/releases/download/sofvsr/SOFVSR_x4.zip',
+    'carn.zip': 'https://github.com/LoSealL/Model/releases/download/carn/tcarn.zip',
+    'edsr.pt': 'https://cv.snu.ac.kr/research/EDSR/models/edsr_baseline_x4-6b446fab.pt',
+    'esrgan.zip': 'https://github.com/LoSealL/Model/releases/download/esrgan/esrgan.zip',
+    'frvsr.zip': 'https://github.com/LoSealL/Model/releases/download/frvsr/FRVSR.zip',
+    'mldn.zip': 'https://github.com/LoSealL/Model/releases/download/mldn/drn.zip',
+    'crdn.zip': 'https://github.com/LoSealL/Model/releases/download/crdn/rsr.zip',
+    'spmc.zip': 'https://github.com/LoSealL/Model/releases/download/spmc/spmc.zip',
+    'rcan.zip': '10bEK-NxVtOS9-XSeyOZyaRmxUTX3iIRa',
+    'rbpn.zip': '1Ozp5j-DBWJSpXY5GvxiEPKdfCaAbOXqu',
+    'srmd.zip': '1ORKH05-aLSbQaWB4qQulIm2INoRufuD_',
+    'dbpn.zip': '1PbhtuMz1zF3-d16dthurJ0xIQ9uyMvkz'
+  }
 }
 
 
@@ -106,12 +110,10 @@ def matches(str1, pattern):
 
 
 def user_input(name, defaults=False, pattern=None):
-  _name = name
-  for _pat in pattern:
-    _name = matches(name, _pat)
-    if _name is not None:
-      break
-  if not _name:
+  if pattern.find('.*') < 0 and pattern.find('*') >= 0:
+    pattern = pattern.replace('*', '.*')
+  _name = matches(name, pattern)
+  if _name is None:
     return
   question = 'Do you wish to download {}? '.format(_name)
   if defaults:
@@ -154,6 +156,7 @@ def drive_download(name, url, path):
 
 def main():
   parser = argparse.ArgumentParser()
+  parser.add_argument("filter", help="an re pattern to filter candidates.")
   parser.add_argument("--download_dir", type=str,
                       default=_DEFAULT_DOWNLOAD_DIR,
                       help="Specify download directory. "
@@ -166,8 +169,6 @@ def main():
                       default=_DEFAULT_WEIGHTS_DIR,
                       help="Specify weights extracted directory. "
                            "[{}]".format(_DEFAULT_WEIGHTS_DIR))
-  parser.add_argument("--filter", nargs='*', default=[],
-                      help="an re pattern to filter candidates.")
   parser.add_argument("-q", "--quiet", action="store_true",
                       help="download quietly")
   args, _ = parser.parse_known_args()
@@ -190,7 +191,8 @@ def get_leaf(key: str, node: dict):
         need_to_download[k] = v
   except (FileNotFoundError, OSError):
     pass
-  for k, v in get_leaf(args.weights_dir, WEIGHTS):
+  from VSR.Backend import BACKEND
+  for k, v in get_leaf(args.weights_dir, WEIGHTS[BACKEND]):
     if user_input(k.stem, args.quiet, args.filter):
       need_to_download[k] = v
   need_to_extract = {}
@@ -212,7 +214,22 @@ def get_leaf(key: str, node: dict):
       open_fn = zipfile.ZipFile
       is_match_fn = zipfile.is_zipfile
     else:
-      raise TypeError("Unrecognized extension: {}".format(ext))
+      class copy:
+        def __init__(self, src):
+          self.src = src
+
+        def __enter__(self):
+          return self
+
+        def __exit__(self, exc_type, exc_val, exc_tb):
+          return
+
+        def extractall(self, dst):
+          import shutil
+          shutil.copy(self.src, dst)
+
+      is_match_fn = lambda x: True
+      open_fn = copy
     if is_match_fn(v):
       with open_fn(v) as fd:
         try:

From 7f4eff3bc07d2f36dd2afe2f88125963acfe1af2 Mon Sep 17 00:00:00 2001
From: Wenyi Tang <wenyi.tang@intel.com>
Date: Wed, 24 Jun 2020 13:55:56 +0800
Subject: [PATCH 07/12] Add `--caching_dataset`

to cache transformed data into memory (ignored when `memory_limit` set)
---
 CHANGELOG.md                           |  1 +
 Train/train.py                         |  2 ++
 VSR/Backend/Keras/Framework/Trainer.py |  7 ++++--
 VSR/Backend/TF/Framework/Trainer.py    |  7 ++++--
 VSR/Backend/Torch/Framework/Trainer.py |  7 ++++--
 VSR/DataLoader/Crop.py                 |  3 ++-
 VSR/DataLoader/Loader.py               | 34 ++++++++++++++++++--------
 7 files changed, 44 insertions(+), 17 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f6dd14b..bb79e84 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@
 - Update TF backend
 - Add support to tensorflow 2.0 (both legacy and eager mode)
 - Refactor torch backend models
+- Add `--caching_dataset` to cache transformed data into memory (ignored when `memory_limit` set).
 
 ## 1.0.5
 ## 2020-05
diff --git a/Train/train.py b/Train/train.py
index 4063d06..d6b166e 100644
--- a/Train/train.py
+++ b/Train/train.py
@@ -34,6 +34,7 @@
 g3.add_argument("--export", help="export ONNX (torch backend) or protobuf (tf backend) (needs support from model)")
 g3.add_argument("-c", "--comment", default=None, help="extend a comment string after saving folder")
 g3.add_argument("--distributed", action="store_true")
+g3.add_argument("--caching_dataset", action="store_true")
 
 
 def main():
@@ -94,6 +95,7 @@ def main():
     config = t.query_config(opt)
     if opt.lr_decay:
       config.lr_schedule = lr_decay(lr=opt.lr, **opt.lr_decay)
+    config.caching = opt.caching_dataset and opt.memory_limit is None
     t.fit([lt, lv], config)
     if opt.export:
       t.export(opt.export)
diff --git a/VSR/Backend/Keras/Framework/Trainer.py b/VSR/Backend/Keras/Framework/Trainer.py
index 0e65e05..d3ee00e 100644
--- a/VSR/Backend/Keras/Framework/Trainer.py
+++ b/VSR/Backend/Keras/Framework/Trainer.py
@@ -42,6 +42,7 @@ def query_config(self, config, **kwargs):
     self.v.traced_val = config.traced_val
     self.v.ensemble = config.ensemble
     self.v.cuda = config.cuda
+    self.v.caching = config.caching_dataset
     return self.v
 
   def fit_init(self) -> bool:
@@ -75,7 +76,8 @@ def fit(self, loaders, config, **kwargs):
       train_iter = v.train_loader.make_one_shot_iterator(v.batch_shape,
                                                          v.steps,
                                                          shuffle=True,
-                                                         memory_limit=mem)
+                                                         memory_limit=mem,
+                                                         caching=v.caching)
       v.train_loader.prefetch(shuffle=True, memory_usage=mem)
       v.avg_meas = {}
       if v.lr_schedule and callable(v.lr_schedule):
@@ -121,7 +123,8 @@ def benchmark(self, loader, config, **kwargs):
     v.loader = loader
     it = v.loader.make_one_shot_iterator(v.batch_shape, v.val_steps,
                                          shuffle=not v.traced_val,
-                                         memory_limit=v.memory_limit)
+                                         memory_limit=v.memory_limit,
+                                         caching=v.caching)
     self.model.to_eval()
     for items in tqdm.tqdm(it, 'Test', ascii=True):
       self.fn_benchmark_each_step(items)
diff --git a/VSR/Backend/TF/Framework/Trainer.py b/VSR/Backend/TF/Framework/Trainer.py
index 19d126b..a9f7aa7 100644
--- a/VSR/Backend/TF/Framework/Trainer.py
+++ b/VSR/Backend/TF/Framework/Trainer.py
@@ -233,6 +233,7 @@ def query_config(self, config, **kwargs) -> Config:
     self.v.traced_val = config.traced_val
     self.v.ensemble = config.ensemble
     self.v.cuda = config.cuda
+    self.v.caching = config.caching_dataset
     return self.v
 
   def fit_init(self) -> bool:
@@ -260,7 +261,8 @@ def fn_train_each_epoch(self):
     train_iter = v.train_loader.make_one_shot_iterator(v.batch_shape,
                                                        v.steps,
                                                        shuffle=True,
-                                                       memory_limit=mem)
+                                                       memory_limit=mem,
+                                                       caching=v.caching)
     v.train_loader.prefetch(v.memory_limit)
     v.avg_meas = {}
     if v.lr_schedule and callable(v.lr_schedule):
@@ -325,7 +327,8 @@ def fn_benchmark_body(self):
     v = self.v
     it = v.loader.make_one_shot_iterator(v.batch_shape, v.val_steps,
                                          shuffle=not v.traced_val,
-                                         memory_limit=v.memory_limit)
+                                         memory_limit=v.memory_limit,
+                                         caching=v.caching)
     for items in tqdm.tqdm(it, 'Test', ascii=True):
       self.fn_benchmark_each_step(items)
 
diff --git a/VSR/Backend/Torch/Framework/Trainer.py b/VSR/Backend/Torch/Framework/Trainer.py
index 55e92d8..14b8286 100644
--- a/VSR/Backend/Torch/Framework/Trainer.py
+++ b/VSR/Backend/Torch/Framework/Trainer.py
@@ -47,6 +47,7 @@ def query_config(self, config, **kwargs):
     self.v.ensemble = config.ensemble
     self.v.cuda = config.cuda
     self.v.map_location = 'cuda:0' if config.cuda and torch.cuda.is_available() else 'cpu'
+    self.v.caching = config.caching
     return self.v
 
   def fit_init(self) -> bool:
@@ -78,7 +79,8 @@ def fit(self, loaders, config, **kwargs):
       train_iter = v.train_loader.make_one_shot_iterator(v.batch_shape,
                                                          v.steps,
                                                          shuffle=True,
-                                                         memory_limit=mem)
+                                                         memory_limit=mem,
+                                                         caching=v.caching)
       v.train_loader.prefetch(shuffle=True, memory_usage=mem)
       v.avg_meas = {}
       if v.lr_schedule and callable(v.lr_schedule):
@@ -125,7 +127,8 @@ def benchmark(self, loader, config, **kwargs):
     v.loader = loader
     it = v.loader.make_one_shot_iterator(v.batch_shape, v.val_steps,
                                          shuffle=not v.traced_val,
-                                         memory_limit=v.memory_limit)
+                                         memory_limit=v.memory_limit,
+                                         caching=v.caching)
     self.model.to_eval()
     for items in tqdm.tqdm(it, 'Test', ascii=True):
       with torch.no_grad():
diff --git a/VSR/DataLoader/Crop.py b/VSR/DataLoader/Crop.py
index 8be9716..26f3f3a 100644
--- a/VSR/DataLoader/Crop.py
+++ b/VSR/DataLoader/Crop.py
@@ -28,7 +28,8 @@ class RandomCrop(Cropper):
   def call(self, img: tuple, shape: (list, tuple)) -> tuple:
     hr, lr = img
     if lr.shape[-2] < shape[-2]:
-      raise ValueError("Batch shape is too large than data")
+      raise ValueError(
+        f"Batch shape is larger than data: {lr.shape} vs {shape}")
     ind = [np.random.randint(nd + 1) for nd in lr.shape - np.array(shape)]
     slc1 = [slice(n, n + s) for n, s in zip(ind, shape)]
     slc2 = slc1.copy()
diff --git a/VSR/DataLoader/Loader.py b/VSR/DataLoader/Loader.py
index 79ee16b..7d793c7 100644
--- a/VSR/DataLoader/Loader.py
+++ b/VSR/DataLoader/Loader.py
@@ -50,6 +50,7 @@ class EpochIterator:
       shape: The shape of the generated batch, 5-D requested [N, T, C, H, W].
       steps: The number of batches to generate in one epoch.
       shuffle: A boolean representing whether to shuffle the dataset.
+      caching: Cache the transform and color converted image.
 
   Note:
       The rules for -1 shape:
@@ -64,11 +65,12 @@ class EpochIterator:
       - If the `steps` is -1, will generate batches in sequential order;
   """
 
-  def __init__(self, loader, shape, steps, shuffle=None):
+  def __init__(self, loader, shape, steps, shuffle=None, caching=False):
     self.loader = loader
     self.shape = shape
     self.depth = shape[1]
     self.count = 0
+    self.cache = caching
     t = len(self.loader.data['hr'])
     frame_nums = [len(i) for i in self.loader.data['hr']]
     temporal_padding = not shuffle
@@ -78,7 +80,7 @@ def __init__(self, loader, shape, steps, shuffle=None):
       idx_ = [(i, np.array([j + x for x in range(depth)])) for j in
               range(-(depth // 2), frame_nums[i] - (depth // 2))]
       d2_ = depth // 2
-      self.index += idx_ if temporal_padding or d2_ == 0 else idx_[d2_ : -d2_]
+      self.index += idx_ if temporal_padding or d2_ == 0 else idx_[d2_: -d2_]
     self.steps = steps if steps >= 0 else len(self.index) // shape[0]
     while len(self.index) < self.steps * shape[0] and self.index:
       self.index += self.index
@@ -110,13 +112,23 @@ def __next__(self):
       d[d >= len(hr)] = len(hr) - 1
       name = self.loader.data['names'][i]
       hr2 = np.asarray(hr, dtype=object)[d]
-      for fn in cb_hr[0]:
-        hr2 = [fn(img) for img in hr2]
-      hr2 = [img.convert(self.loader.hr['color']) for img in hr2]
+      if not self.loader.cache_map.get(f'hr-{name}-{i}-{d}'):
+        for fn in cb_hr[0]:
+          hr2 = [fn(img) for img in hr2]
+        hr2 = [img.convert(self.loader.hr['color']) for img in hr2]
+        if self.cache:
+          self.loader.data['hr'][i] = hr2
+          self.loader.cache_map[f'hr-{name}-{i}-{d}'] = True
+          LOG.debug(f"Caching hr-{name}-{i}-{d}...")
       lr2 = np.asarray(lr, dtype=object)[d]
-      for fn in cb_lr[0]:
-        lr2 = [fn(img) for img in lr2]
-      lr2 = [img.convert(self.loader.lr['color']) for img in lr2]
+      if not self.loader.cache_map.get(f'lr-{name}-{i}-{d}'):
+        for fn in cb_lr[0]:
+          lr2 = [fn(img) for img in lr2]
+        lr2 = [img.convert(self.loader.lr['color']) for img in lr2]
+        if self.cache:
+          self.loader.data['lr'][i] = lr2
+          self.loader.cache_map[f'lr-{name}-{i}-{d}'] = True
+          LOG.debug(f"Caching lr-{name}-{i}-{d}...")
       hr3 = np.stack([img_to_array(img, DATA_FORMAT) for img in hr2])
       lr3 = np.stack([img_to_array(img, DATA_FORMAT) for img in lr2])
       del hr2, lr2
@@ -227,6 +239,7 @@ def __init__(self, hr_data, lr_data=None, scale=None, extra_data: dict = None,
       'names': [],
       'extra': []
     }
+    self.cache_map = {}
     self.extra = extra_data or {}
     self.crop = None
     self.threads = threads
@@ -292,7 +305,7 @@ def set_color_space(self, target: str, mode: str):
     getattr(self, target.lower()).update(color=mode)
 
   def make_one_shot_iterator(self, batch_shape, steps, shuffle=None,
-                             memory_limit=None):
+                             memory_limit=None, caching=False):
     """Make an iterator object to generate batch data for models.
 
     Args:
@@ -300,6 +313,7 @@ def make_one_shot_iterator(self, batch_shape, steps, shuffle=None,
         steps: The number of batches to generate in one epoch.
         shuffle: A boolean representing whether to shuffle the dataset.
         memory_limit: the maximum system memory to use. (Not GPU memory!!)
+        caching: cache the tranformed images (tranform1 and color conversion)
 
     Note:
         The rules for -1 shape:
@@ -339,7 +353,7 @@ def make_one_shot_iterator(self, batch_shape, steps, shuffle=None,
         if loaded >= self.aux['cap'] / memory_limit:
           loaded = 0
       self.loaded = loaded << (self.threads * 2)
-    return EpochIterator(self, shape, steps, shuffle)
+    return EpochIterator(self, shape, steps, shuffle, caching)
 
   def prefetch(self, shuffle=None, memory_usage=None):
     # check memory usage

From 7e1acaea12a6d135e24e1e952a44fc2c9760f2d7 Mon Sep 17 00:00:00 2001
From: Wenyi Tang <wenyi.tang@intel.com>
Date: Wed, 24 Jun 2020 14:18:01 +0800
Subject: [PATCH 08/12] copy config file into working directory

---
 Train/eval.py  | 1 +
 Train/train.py | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/Train/eval.py b/Train/eval.py
index 8904d6b..07955cb 100644
--- a/Train/eval.py
+++ b/Train/eval.py
@@ -70,6 +70,7 @@ def main():
       model_config_file = Path(f'{CWD}/Train/par/{BACKEND}/{opt.model}.{_ext}')
     if model_config_file.exists():
       opt.update(compat_param(Config(str(model_config_file))))
+      break
   # get model parameters from pre-defined YAML file
   model_params = opt.get(opt.model, {})
   suppress_opt_by_args(model_params, *args)
diff --git a/Train/train.py b/Train/train.py
index d6b166e..60027e8 100644
--- a/Train/train.py
+++ b/Train/train.py
@@ -4,13 +4,14 @@
 #  Update: 2020 - 2 - 7
 
 import argparse
+import shutil
 from pathlib import Path
 
 from VSR.Backend import BACKEND
 from VSR.DataLoader import CenterCrop, Loader, RandomCrop
 from VSR.DataLoader import load_datasets
 from VSR.Model import get_model, list_supported_models
-from VSR.Util import Config, lr_decay, suppress_opt_by_args, compat_param
+from VSR.Util import Config, compat_param, lr_decay, suppress_opt_by_args
 
 CWD = Path(__file__).resolve().parent.parent
 parser = argparse.ArgumentParser(description=f'VSR ({BACKEND}) Training Tool v1.0')
@@ -54,6 +55,7 @@ def main():
       model_config_file = Path(f'{CWD}/Train/par/{BACKEND}/{opt.model}.{_ext}')
     if model_config_file.exists():
       opt.update(compat_param(Config(str(model_config_file))))
+      break
   # get model parameters from pre-defined YAML file
   model_params = opt.get(opt.model, {})
   suppress_opt_by_args(model_params, *args)
@@ -92,6 +94,8 @@ def main():
       lv.set_color_space('lr', 'L')
   # enter model executor environment
   with model.get_executor(root) as t:
+    if hasattr(t, '_logd') and isinstance(t._logd, Path):
+      shutil.copy(model_config_file, t._logd)
     config = t.query_config(opt)
     if opt.lr_decay:
       config.lr_schedule = lr_decay(lr=opt.lr, **opt.lr_decay)

From 854d5bf1b01f95e715fea0b8c5ad6bee63756043 Mon Sep 17 00:00:00 2001
From: Wenyi Tang <wenyi.tang@intel.com>
Date: Tue, 28 Jul 2020 13:39:36 +0800
Subject: [PATCH 09/12] Fix FastMetrics multi-threads issue

---
 Tools/FastMetrics.py | 55 +++++++++++++++++++++++++++++---------------
 1 file changed, 37 insertions(+), 18 deletions(-)

diff --git a/Tools/FastMetrics.py b/Tools/FastMetrics.py
index 8a94beb..7d48599 100644
--- a/Tools/FastMetrics.py
+++ b/Tools/FastMetrics.py
@@ -4,13 +4,13 @@
 #   Update Date: 6/6/19, 10:35 AM
 
 import argparse
-import multiprocessing as mp
+from multiprocessing.pool import ThreadPool
 from pathlib import Path
 
 import numpy as np
 import tqdm
 from PIL import Image
-from skimage.measure import compare_ssim
+from skimage.metrics import structural_similarity
 
 from VSR.Util.ImageProcess import rgb_to_yuv
 
@@ -24,22 +24,36 @@
 FLAGS = parser.parse_args()
 
 
+def split_path_filter(x: Path):
+  try:
+    x = x.resolve()
+    # path, glob pattern, recursive
+    return x, '*', False
+  except OSError:
+    print(str(x.as_posix()))
+    pattern = x.name
+    rec = False
+    x = x.parent
+    if '*' in x.name:
+      x = x.parent
+      rec = True
+    print(x, pattern, rec)
+    return x, pattern, rec
+
+
 def gen():
   d1 = Path(FLAGS.input_dir)
   d2 = Path(FLAGS.reference_dir)
+  d1, d1_filter, d1_rec = split_path_filter(d1)
+  d2, d2_filter, d2_rec = split_path_filter(d2)
 
   assert d1.exists() and d2.exists(), "Path not found!"
-  assert len(list(d1.iterdir())) == len(list(d2.iterdir())), f"{d1} v {d2}"
-
-  for x, y in zip(sorted(d1.iterdir()), sorted(d2.iterdir())):
-    if x.is_dir() and y.is_dir():
-      assert len(list(x.iterdir())) == len(list(y.iterdir())), f"{x} v {y}"
-      for i, j in zip(sorted(x.iterdir()), sorted(y.iterdir())):
-        if i.is_file() and j.is_file():
-          yield i, j
-        else:
-          print(f" [!] Found {i} v.s. {j} not file.")
-    elif x.is_file() and y.is_file():
+  d1 = sorted(d1.rglob(d1_filter)) if d1_rec else sorted(d1.glob(d1_filter))
+  d2 = sorted(d2.rglob(d2_filter)) if d2_rec else sorted(d2.glob(d2_filter))
+  assert len(d1) == len(d2), f"{len(d1)} v {len(d2)}"
+
+  for x, y in zip(d1, d2):
+    if x.is_file() and y.is_file():
       yield x, y
     else:
       print(f" [!] Found {x} v.s. {y} mismatch.")
@@ -54,9 +68,14 @@ def main():
   def action(x, y):
     xname = f'{x.parent.name}/{x.stem}'
     yname = f'{y.parent.name}/{y.stem}'
-    x = Image.open(x)
-    y = Image.open(y)
-    assert x.width == y.width and x.height == y.height, "Image size mismatch!"
+    x = Image.open(x).convert('RGB')
+    y = Image.open(y).convert('RGB')
+    if x.width != y.width or x.height != y.height:
+      # print(f"Image size mismatch {x.width}x{x.height} != {y.width}x{y.height}")
+      min_w = min(x.width, y.width)
+      min_h = min(x.height, y.height)
+      x = x.crop([0, 0, min_w, min_h])
+      y = y.crop([0, 0, min_w, min_h])
     xx = np.asarray(x, dtype=np.float) / 255.0
     yy = np.asarray(y, dtype=np.float) / 255.0
     if FLAGS.l_only:
@@ -69,14 +88,14 @@ def action(x, y):
     psnr = np.log10(1.0 / mse) * 10.0
     info = {"x": xname, "y": yname}
     if FLAGS.ssim:
-      ssim = compare_ssim(xx, yy, multichannel=True)
+      ssim = structural_similarity(xx, yy, multichannel=True)
       info.update(SSIM=ssim)
     info.update(PSNR=psnr)
     info.update(MSE=mse)
     return info
 
   if FLAGS.multithread:
-    pool = mp.pool.ThreadPool()
+    pool = ThreadPool()
     results = [pool.apply_async(action, (i, j)) for i, j in gen()]
     with tqdm.tqdm(results) as r:
       for info in r:

From ad66a634162390c4d952d32f7e95c4d21b2890f6 Mon Sep 17 00:00:00 2001
From: Wenyi Tang <wenyi.tang@intel.com>
Date: Tue, 28 Jul 2020 13:40:00 +0800
Subject: [PATCH 10/12] Add depth=1 in cubic config

---
 CHANGELOG.md                        | 3 ++-
 Train/par/pytorch/cubic.yml         | 1 +
 VSR/Backend/Torch/Models/Bicubic.py | 2 ++
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bb79e84..cf54d9c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,11 +1,12 @@
 1.0.6
 
 ## 1.0.6
-## 2020-06
+## 2020-07
 - Update TF backend
 - Add support to tensorflow 2.0 (both legacy and eager mode)
 - Refactor torch backend models
 - Add `--caching_dataset` to cache transformed data into memory (ignored when `memory_limit` set).
+- Fix FastMetrics multi-threads issue
 
 ## 1.0.5
 ## 2020-05
diff --git a/Train/par/pytorch/cubic.yml b/Train/par/pytorch/cubic.yml
index 58ac9ba..ef3a07d 100644
--- a/Train/par/pytorch/cubic.yml
+++ b/Train/par/pytorch/cubic.yml
@@ -1,5 +1,6 @@
 cubic:
     scale: 4
     channel: 3
+    depth: 1
 
 batch_shape: [16, 3, 32, 32]
diff --git a/VSR/Backend/Torch/Models/Bicubic.py b/VSR/Backend/Torch/Models/Bicubic.py
index b8d43a1..c1045f6 100644
--- a/VSR/Backend/Torch/Models/Bicubic.py
+++ b/VSR/Backend/Torch/Models/Bicubic.py
@@ -21,6 +21,8 @@ def __init__(self, scale):
     self.scale = scale
 
   def forward(self, x):
+    if self.scale == 1:
+      return x
     ret = []
     for img in [i[0] for i in x.split(1, dim=0)]:
       img = self.to_pil(img.cpu())

From d36652854a30f98761ec78c5cb1eb9345c612d52 Mon Sep 17 00:00:00 2001
From: Wenyi Tang <wenyi.tang@intel.com>
Date: Wed, 29 Jul 2020 14:01:40 +0800
Subject: [PATCH 11/12] Fix Torch.Models.Model.BasicModel can't init twice

---
 CHANGELOG.md                      |  2 +-
 VSR/Backend/Torch/Models/Model.py | 16 ++++++++++------
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cf54d9c..12fb2a0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,4 @@
-1.0.6
+1.0.6.1
 
 ## 1.0.6
 ## 2020-07
diff --git a/VSR/Backend/Torch/Models/Model.py b/VSR/Backend/Torch/Models/Model.py
index 4c6c326..2944061 100644
--- a/VSR/Backend/Torch/Models/Model.py
+++ b/VSR/Backend/Torch/Models/Model.py
@@ -20,14 +20,18 @@ class BasicModel(object):
     - opts: contains a K-V pair of `str: optim.Optimizer`. Will be automatically
       appended if a derived object assign any attribute with `optim.Optimizer`.
   """
-  modules = OrderedDict()
-  opts = OrderedDict()
-  name = ''
-  loaded = None
-  _trainer = None
+
+  def _setup(self):
+    self.setup = True
+    self.modules = OrderedDict()
+    self.opts = OrderedDict()
+    self.name = ''
+    self.loaded = None
 
   def __setattr__(self, key, value):
-    if key in ('modules', 'opts',):
+    if not hasattr(self, 'setup') and key != 'setup':
+      self._setup()
+    if key in ('modules', 'opts', 'setup'):
       if hasattr(self, key):
         raise ValueError(f"Can't overwrite built-in '{key}' of BasicModel")
     if isinstance(value, torch.nn.Module):

From 37c17bcebc925c08077f8e7a05498df1555ca6f4 Mon Sep 17 00:00:00 2001
From: Wenyi Tang <wenyi.tang@intel.com>
Date: Wed, 29 Jul 2020 14:28:35 +0800
Subject: [PATCH 12/12] Fix module import path

---
 Tests/motion_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Tests/motion_test.py b/Tests/motion_test.py
index e7f03a3..5513183 100644
--- a/Tests/motion_test.py
+++ b/Tests/motion_test.py
@@ -7,7 +7,7 @@
 if not os.getcwd().endswith('Tests'):
   os.chdir('Tests')
 from VSR.Backend.TF.Framework import Motion as M
-from VSR.Backend.Torch.Models.video import motion as MT
+from VSR.Backend.Torch.Models.Ops import Motion as MT
 from VSR.DataLoader.FloDecoder import open_flo, KITTI
 
 import tensorflow as tf