insop · insop · Oct 30, 2024 · Oct 30, 2024 · Oct 30, 2024 · Oct 30, 2024
diff --git a/README.md b/README.md
@@ -31,6 +31,24 @@ python ./experiments/classify.py  --random-seed=1234 --num-epochs=1 --tiny
 
 ```
 
+# Tests
+
+## Python implementation tests
+To run the tests for the Python implementation of the transformer, execute the following command:
+
+```
+cd src/python
+python -m unittest test_transformer.py
+```
+
+## C implementation tests
+To run the tests for the C implementation of the transformer, execute the following command:
+
+```
+cd src/c
+./test_transformer
+```
+
 # TODO:
 - main Makefile for build library and c executable
 - add config load

diff --git a/src/c/test_transformer.c b/src/c/test_transformer.c
@@ -0,0 +1,64 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "transformer.h"
+
+void test_self_attention() {
+    struct Transformer *trfm = init_transformer();
+    assert(trfm);
+
+    Tensor x, z;
+    x = MatDim(trfm->n_seq, trfm->dim_embedding);
+    MatFill(x, 4);
+
+    z = self_attention(&trfm->sa[0], x);
+
+    assert(MatRows(z) == trfm->n_seq);
+    assert(MatCols(z) == trfm->dim_internal);
+
+    MatUnDim(x);
+    MatUnDim(z);
+}
+
+void test_multi_head_attention() {
+    struct Transformer *trfm = init_transformer();
+    assert(trfm);
+
+    Tensor x, z;
+    x = MatDim(trfm->n_seq, trfm->dim_embedding);
+    MatFill(x, 4);
+
+    z = multi_head_attention(trfm, x);
+
+    assert(MatRows(z) == trfm->n_seq);
+    assert(MatCols(z) == trfm->dim_embedding);
+
+    MatUnDim(x);
+    MatUnDim(z);
+}
+
+void test_transformer_block() {
+    struct Transformer *trfm = init_transformer();
+    assert(trfm);
+
+    Tensor x, z;
+    x = MatDim(trfm->n_seq, trfm->dim_embedding);
+    MatFill(x, 4);
+
+    z = transformer_block(trfm, x);
+
+    assert(MatRows(z) == trfm->n_seq);
+    assert(MatCols(z) == trfm->dim_embedding);
+
+    MatUnDim(x);
+    MatUnDim(z);
+}
+
+int main() {
+    test_self_attention();
+    test_multi_head_attention();
+    test_transformer_block();
+
+    printf("All tests passed.\n");
+    return 0;
+}
diff --git a/src/python/test_transformer.py b/src/python/test_transformer.py
@@ -0,0 +1,50 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import unittest
+from transformer_simple import SelfAttention_naive, MultiHeadAttention_naive, TransformerBlock_naive
+
+class TestSelfAttentionNaive(unittest.TestCase):
+    def setUp(self):
+        self.dim_emb = 4
+        self.dim_internal = 3
+        self.heads = 8
+        self.n_seq = 2
+        self.dtype = torch.float32
+        self.model = SelfAttention_naive(self.dim_emb, self.dim_internal, self.heads, dtype=self.dtype)
+        self.x = torch.ones([1, self.n_seq, self.dim_emb], dtype=self.dtype)
+
+    def test_forward(self):
+        output = self.model(self.x)
+        self.assertEqual(output.shape, (1, self.n_seq, self.dim_internal))
+
+class TestMultiHeadAttentionNaive(unittest.TestCase):
+    def setUp(self):
+        self.dim_emb = 4
+        self.dim_internal = 3
+        self.heads = 8
+        self.n_seq = 2
+        self.dtype = torch.float32
+        self.model = MultiHeadAttention_naive(self.n_seq, self.dim_emb, self.dim_internal, self.heads, dtype=self.dtype)
+        self.x = torch.ones([1, self.n_seq, self.dim_emb], dtype=self.dtype)
+
+    def test_forward(self):
+        output = self.model(self.x)
+        self.assertEqual(output.shape, (1, self.n_seq, self.dim_emb))
+
+class TestTransformerBlockNaive(unittest.TestCase):
+    def setUp(self):
+        self.dim_emb = 4
+        self.dim_internal = 3
+        self.heads = 8
+        self.n_seq = 2
+        self.dtype = torch.float32
+        self.model = TransformerBlock_naive(self.n_seq, self.dim_emb, self.dim_internal, self.heads, dtype=self.dtype)
+        self.x = torch.ones([1, self.n_seq, self.dim_emb], dtype=self.dtype)
+
+    def test_forward(self):
+        output = self.model(self.x)
+        self.assertEqual(output.shape, (1, self.n_seq, self.dim_emb))
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/src/python/transformer_simple.py b/src/python/transformer_simple.py
@@ -22,16 +22,16 @@ def init_weight_zavier(x):
         nn.init.constant_(x.bias, 0)
 
 class SelfAttention_naive(nn.Module):
-    def __init__(self, dim_emb, dim_internal, heads=8, mask=False, dropout=0.0, dtype=torch.float32):
-        """
-        A single self attention block
+    """
+    A single self attention block
 
-        :param dim_emb: embedding dimension
-        :param dim_internal: dimension of internal representation, usually the same as dim_emb
-        :param head: number of multi head
-        :param mask
+    :param dim_emb: embedding dimension
+    :param dim_internal: dimension of internal representation, usually the same as dim_emb
+    :param head: number of multi head
+    :param mask
 
-        """
+    """
+    def __init__(self, dim_emb, dim_internal, heads=8, mask=False, dropout=0.0, dtype=torch.float32):
         super().__init__()
 
         self.dim_emb = dim_emb
@@ -69,17 +69,17 @@ def forward(self, x):
 
 
 class MultiHeadAttention_naive(nn.Module):
-    def __init__(self, n_seq, dim_emb, dim_internal, heads=8, mask=False, dropout=0.0, dtype=torch.float32):
-        """
-        multi head attention block
+    """
+    multi head attention block
 
-        :param n_seq: number of token sequence
-        :param dim_emb: embedding dimension
-        :param dim_internal: dimension of internal representation, usually the same as dim_emb
-        :param head: number of multi head
-        :param mask
+    :param n_seq: number of token sequence
+    :param dim_emb: embedding dimension
+    :param dim_internal: dimension of internal representation, usually the same as dim_emb
+    :param head: number of multi head
+    :param mask
 
-        """
+    """
+    def __init__(self, n_seq, dim_emb, dim_internal, heads=8, mask=False, dropout=0.0, dtype=torch.float32):
         super().__init__()
 
         self.n_seq = n_seq
@@ -104,10 +104,10 @@ def forward(self, x):
 
 
 class TransformerBlock_naive(nn.Module):
+    """
+    :ff_hidden_mult: number of multiples of embedding for total hidden size
+    """
     def __init__(self, n_seq, dim_emb, dim_internal, heads=8, mask=False, ff_hidden_mult=4, dropout=0.0, dtype=torch.float32):
-        """
-        :ff_hidden_mult: number of multiples of embedding for total hidden size
-        """
         super().__init__()
 
         self.mha = MultiHeadAttention_naive(n_seq=n_seq, dim_emb=dim_emb, dim_internal=dim_internal, heads=heads, mask=mask, dropout=dropout, dtype=dtype)