diff --git a/.github/workflows/deploy_docs.yml b/.github/workflows/deploy_docs.yml
new file mode 100644
index 0000000000..fbb09ae75d
--- /dev/null
+++ b/.github/workflows/deploy_docs.yml
@@ -0,0 +1,48 @@
+# This is a basic workflow that is manually triggered
+
+name: Manual workflow
+
+# Controls when the action will run. Workflow runs when manually triggered using the UI
+# or API.
+on:
+  workflow_dispatch:
+    # Inputs the workflow accepts.
+    inputs:
+      name:
+        # Friendly description to be shown in the UI instead of 'name'
+        description: 'Person to greet'
+        # Default value if no value is explicitly provided
+        default: 'World'
+        # Input has to be provided for the workflow to run
+        required: true
+        # The data type of the input
+        type: string
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+  deploy-docs:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python 3.9
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.9
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install --upgrade setuptools
+        pip install -e .
+        pip install -r docs/requirements.txt
+    - name: Build docs
+      run: |
+        set -e
+        # Check that docs are built without errors
+        cd docs/ && make html && cd ..
+    - name: Deploy docs
+      uses: JamesIves/github-pages-deploy-action@3.7.1
+      with:
+        GITHUB_TOKEN:  ${{ secrets.GITHUB_TOKEN }}
+        BRANCH: gh-pages
+        FOLDER: docs/build/html
+        CLEAN: true
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index efed46a06f..feabcddab0 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -9,10 +9,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v2
-    - name: Set up Python 3.8
+    - name: Set up Python 3.9
       uses: actions/setup-python@v2
       with:
-        python-version: 3.8
+        python-version: 3.9
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
@@ -35,10 +35,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v2
-    - name: Set up Python 3.8
+    - name: Set up Python 3.9
       uses: actions/setup-python@v2
       with:
-        python-version: 3.8
+        python-version: 3.9
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
diff --git a/onmt/modules/moe.py b/onmt/modules/moe.py
index f356130d97..a8ceba6cde 100644
--- a/onmt/modules/moe.py
+++ b/onmt/modules/moe.py
@@ -60,7 +60,9 @@ def forward(self, x):
         y = torch.empty_like(x)
         for i, expert in enumerate(self.experts):
             if torch.any(flat_expert_indices == i):
-                y[flat_expert_indices == i] = expert(x[flat_expert_indices == i])
+                y[flat_expert_indices == i] = expert(
+                    x[flat_expert_indices == i].unsqueeze(0)
+                )
         y = (y.view(*expert_weights.shape, -1) * expert_weights.unsqueeze(-1)).sum(
             dim=1
         )
diff --git a/onmt/modules/rmsnorm.py b/onmt/modules/rmsnorm.py
index a25d08b27e..fb8cc29065 100644
--- a/onmt/modules/rmsnorm.py
+++ b/onmt/modules/rmsnorm.py
@@ -4,11 +4,11 @@
 import torch.nn as nn
 
 try:
-    import awq_inference_engine
+    import awq_ext
 
-    AWQ_INFERENCE_ENGINE = True
+    AWQ_EXT = True
 except ImportError:
-    AWQ_INFERENCE_ENGINE = False
+    AWQ_EXT = False
 
 
 class RMSNorm(torch.nn.Module):
@@ -24,12 +24,12 @@ def __init__(self, hidden_size: int, eps: float = 1e-6):
         self.weight = nn.Parameter(torch.ones(hidden_size))
 
     def forward(self, hidden_states):
-        if AWQ_INFERENCE_ENGINE and not self.training:
+        if AWQ_EXT and not self.training:
             inp_type = hidden_states.dtype
             output = torch.empty_like(hidden_states).to(inp_type)
             if hidden_states.dim() == 2:  # patch for multi experts
                 hidden_states = hidden_states.unsqueeze(0)
-            awq_inference_engine.layernorm_forward_cuda(
+            awq_ext.layernorm_forward_cuda(
                 hidden_states.half(), self.weight.half(), output.half(), self.eps
             )
             if hidden_states.dim() == 2:  # patch for multi experts