RLE-Foundation · Zach-Attach · Oct 29, 2024
diff --git a/rllte/xplore/reward/disagreement.py b/rllte/xplore/reward/disagreement.py
@@ -54,7 +54,7 @@ class Disagreement(BaseReward):
         batch_size (int): The batch size for training.
         update_proportion (float): The proportion of the training data used for updating the forward dynamics models.
         encoder_model (str): The network architecture of the encoder from ['mnih', 'pathak'].
-        weight_init (str): The weight initialization method from ['default', 'orthogonal'].
+        weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he'].
 
     Returns:
         Instance of Disagreement.

diff --git a/rllte/xplore/reward/e3b.py b/rllte/xplore/reward/e3b.py
@@ -55,7 +55,7 @@ class E3B(BaseReward):
         batch_size (int): The batch size for training.
         update_proportion (float): The proportion of the training data used for updating the forward dynamics models.
         encoder_model (str): The network architecture of the encoder from ['mnih', 'pathak'].
-        weight_init (str): The weight initialization method from ['default', 'orthogonal'].
+        weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he'].
 
     Returns:
         Instance of E3B.

diff --git a/rllte/xplore/reward/icm.py b/rllte/xplore/reward/icm.py
@@ -54,7 +54,7 @@ class ICM(BaseReward):
         batch_size (int): The batch size for training.
         update_proportion (float): The proportion of the training data used for updating the forward dynamics models.
         encoder_model (str): The network architecture of the encoder from ['mnih', 'pathak'].
-        weight_init (str): The weight initialization method from ['default', 'orthogonal'].
+        weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he'].
 
     Returns:
         Instance of ICM.

diff --git a/rllte/xplore/reward/model.py b/rllte/xplore/reward/model.py
@@ -36,6 +36,12 @@ def orthogonal_layer_init(layer, std=np.sqrt(2), bias_const=0.0):
     th.nn.init.constant_(layer.bias, bias_const)
     return layer
 
+def kaiming_he_init(layer):
+    th.nn.init.kaiming_normal_(layer.weight, nonlinearity='relu')
+    if layer.bias is not None:
+        th.nn.init.zeros_(layer.bias)
+    return layer
+
 def default_layer_init(layer):
     stdv = 1. / math.sqrt(layer.weight.size(1))
     layer.weight.data.uniform_(-stdv, stdv)
@@ -49,6 +55,8 @@ class ObservationEncoder(nn.Module):
     Args:
         obs_shape (Tuple): The data shape of observations.
         latent_dim (int): The dimension of encoding vectors.
+        encoder_model (str): The network architecture of the encoder from ['mnih', 'espeholt']. Defaults to 'mnih'
+        weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he']. Defaults to 'default'
 
     Returns:
         Encoder instance.
@@ -59,6 +67,8 @@ def __init__(self, obs_shape: Tuple, latent_dim: int, encoder_model:str = "mnih"
 
         if weight_init == "orthogonal":
             init_ = orthogonal_layer_init
+        elif weight_init == "kaiming he":
+            init_ = kaiming_he_init
         elif weight_init == "default":
             init_ = default_layer_init
         else:

diff --git a/rllte/xplore/reward/ngu.py b/rllte/xplore/reward/ngu.py
@@ -56,6 +56,8 @@ class NGU(Fabric):
         sm (float): The kernel maximum similarity.
         mrs (float): The maximum reward scaling.
         update_proportion (float): The proportion of the training data used for updating the forward dynamics models.
+        encoder_model (str): The network architecture of the encoder from ['mnih', 'pathak'].
+        weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he'].
 
     Returns:
         Instance of NGU.

diff --git a/rllte/xplore/reward/pseudo_counts.py b/rllte/xplore/reward/pseudo_counts.py
@@ -60,7 +60,7 @@ class PseudoCounts(BaseReward):
         sm (float): The kernel maximum similarity.
         update_proportion (float): The proportion of the training data used for updating the forward dynamics models.
         encoder_model (str): The network architecture of the encoder from ['mnih', 'pathak'].
-        weight_init (str): The weight initialization method from ['default', 'orthogonal'].
+        weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he'].
 
     Returns:
         Instance of PseudoCounts.

diff --git a/rllte/xplore/reward/re3.py b/rllte/xplore/reward/re3.py
@@ -50,7 +50,7 @@ class RE3(BaseReward):
         k (int): Use the k-th neighbors.
         average_entropy (bool): Use the average of entropy estimation.
         encoder_model (str): The network architecture of the encoder from ['mnih', 'pathak'].
-        weight_init (str): The weight initialization method from ['default', 'orthogonal'].
+        weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he'].
 
     Returns:
         Instance of RE3.

diff --git a/rllte/xplore/reward/ride.py b/rllte/xplore/reward/ride.py
@@ -60,7 +60,7 @@ class RIDE(BaseReward):
         sm (float): The kernel maximum similarity.
         update_proportion (float): The proportion of the training data used for updating the forward dynamics models.
         encoder_model (str): The network architecture of the encoder from ['mnih', 'pathak'].
-        weight_init (str): The weight initialization method from ['default', 'orthogonal'].
+        weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he'].
 
     Returns:
         Instance of RIDE.

diff --git a/rllte/xplore/reward/rnd.py b/rllte/xplore/reward/rnd.py
@@ -53,7 +53,7 @@ class RND(BaseReward):
         batch_size (int): The batch size for training.
         update_proportion (float): The proportion of the training data used for updating the forward dynamics models.
         encoder_model (str): The network architecture of the encoder from ['mnih', 'pathak'].
-        weight_init (str): The weight initialization method from ['default', 'orthogonal'].
+        weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he'].
 
     Returns:
         Instance of RND.