From 04d25bc648ab5285f9da8fc2fc1db85943448129 Mon Sep 17 00:00:00 2001
From: Mathieu Poumeyrol <mathieu.poumeyrol@sonos.com>
Date: Thu, 21 Nov 2024 17:01:01 +0100
Subject: [PATCH 01/21] tweak approximation for OpenELM-270 PPF

---
 .travis/test-llm.sh | 24 +++++++++++++++---------
 cli/src/main.rs     |  6 ++++++
 cli/src/params.rs   | 21 +++++++++++++--------
 data/src/tensor.rs  | 17 ++++++++++++++++-
 4 files changed, 50 insertions(+), 18 deletions(-)

diff --git a/.travis/test-llm.sh b/.travis/test-llm.sh
index 434ddbcb87..80cfe6b5cc 100755
--- a/.travis/test-llm.sh
+++ b/.travis/test-llm.sh
@@ -28,14 +28,6 @@ case $model in
         ;;
 esac
 
-case $q in
-    q40f16) approx=ultra;;
-    q40ef16) approx=ultra;;
-    f16f16) approx=ultra;;
-    q40f32) approx=very;;
-    q40ef32) approx=very;;
-    f32f32) approx=approximate;;
-esac
 
 nnef=llm/$generation/$id/$id.nnef.tgz
 
@@ -58,8 +50,22 @@ for t in p0s100 p50s50 p99s1
 do
     npz=llm/$generation/$id/$id.$t.io.npz
     $CACHE_FILE $npz
+
+    case $q in
+        q40f16) approx="--approx ultra";;
+        q40ef16) approx="--approx ultra";;
+        f16f16) approx="--approx ultra";;
+        q40f32) approx="--approx very";;
+        q40ef32) approx="--approx very";;
+        f32f32) approx="--approx approximate";;
+    esac
+
+    case "$id.$t" in 
+        apple--OpenELM-270M-f16f16.p50s50) approx="--approx-custom 0.2,0.1,0.003"
+    esac
+
     $TRACT_RUN -v --nnef-tract-core $MODELS/$nnef -O run \
         --input-from-npz $MODELS/$npz \
         --assert-output-bundle $MODELS/$npz \
-        --approx $approx --allow-float-casts
+        $approx --allow-float-casts
 done
diff --git a/cli/src/main.rs b/cli/src/main.rs
index b1baff6734..0c2c44cf31 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -422,6 +422,12 @@ fn assertions_options(command: clap::Command) -> clap::Command {
             .long("approx")
             .help("Approximation level used in assertions."),
             )
+        .arg(
+            Arg::new("approx-custom")
+            .takes_value(true)
+            .long("approx-custom")
+            .help("Approximation level used in assertions (atol, rtol, outlier ratio). 3 coma-separated floats."),
+            )
         .arg(
             Arg::new("assert-output")
             .takes_value(true)
diff --git a/cli/src/params.rs b/cli/src/params.rs
index db8f744008..8f108ad994 100644
--- a/cli/src/params.rs
+++ b/cli/src/params.rs
@@ -1136,14 +1136,19 @@ impl Assertions {
                     .collect()
             });
         let allow_missing_outputs = sub.is_present("allow-missing-outputs");
-        let approximation = match sub.value_of("approx").unwrap() {
-            "exact" => Approximation::Exact,
-            "close" => Approximation::Close,
-            "approximate" => Approximation::Approximate,
-            "very" => Approximation::VeryApproximate,
-            "super" => Approximation::SuperApproximate,
-            "ultra" => Approximation::UltraApproximate,
-            _ => panic!(),
+        let approximation = if let Some(custom) = sub.value_of("approx-custom") {
+            let Some((atol, rtol, approx)) = custom.split(",").collect_tuple() else { bail!("Can't parse approx custom. It should look like 0.001,0.002,0.003") };
+            Approximation::Custom(atol.parse()?, rtol.parse()?, approx.parse()?)
+        } else {
+            match sub.value_of("approx").unwrap() {
+                "exact" => Approximation::Exact,
+                "close" => Approximation::Close,
+                "approximate" => Approximation::Approximate,
+                "very" => Approximation::VeryApproximate,
+                "super" => Approximation::SuperApproximate,
+                "ultra" => Approximation::UltraApproximate,
+                _ => panic!(),
+            }
         };
         Ok(Assertions {
             assert_outputs,
diff --git a/data/src/tensor.rs b/data/src/tensor.rs
index c294098ce5..0f6b4d4381 100644
--- a/data/src/tensor.rs
+++ b/data/src/tensor.rs
@@ -20,7 +20,7 @@ use std::sync::Arc;
 pub mod litteral;
 pub mod view;
 
-#[derive(Copy, Clone, Default, PartialEq, Eq, Debug)]
+#[derive(Copy, Clone, Default, Debug)]
 pub enum Approximation {
     Exact,
     #[default]
@@ -29,8 +29,22 @@ pub enum Approximation {
     VeryApproximate,
     SuperApproximate,
     UltraApproximate,
+    Custom(f32, f32, f32),
 }
 
+impl PartialEq for Approximation {
+    fn eq(&self, other: &Self) -> bool {
+        use Approximation::Custom;
+        if let (Custom(aa, ar, ao), Custom(ba, br, bo)) = (self, other) {
+            aa == ba && ar == br && bo == ao
+        } else {
+            std::mem::discriminant(self) == std::mem::discriminant(other)
+        }
+    }
+}
+
+impl Eq for Approximation {}
+
 impl From<bool> for Approximation {
     fn from(b: bool) -> Self {
         if b {
@@ -54,6 +68,7 @@ impl Approximation {
             (VeryApproximate, _) => (5e-2, 1e-2, 0.0),
             (SuperApproximate, _) => (0.1, 0.05, 0.0001),
             (UltraApproximate, _) => (0.2, 0.1, 0.0005),
+            (Custom(atol, rtol, out), _) => (*atol as _, *rtol as _, *out as _),
         }
     }
 }

From 013b9b7ace6d92472b386807e7ba7f73bfc1abec Mon Sep 17 00:00:00 2001
From: Mathieu Poumeyrol <mathieu.poumeyrol@sonos.com>
Date: Fri, 22 Nov 2024 08:46:14 +0100
Subject: [PATCH 02/21] custom approx for openelm and tynillama

---
 .travis/test-llm.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.travis/test-llm.sh b/.travis/test-llm.sh
index 80cfe6b5cc..d53535756e 100755
--- a/.travis/test-llm.sh
+++ b/.travis/test-llm.sh
@@ -61,7 +61,10 @@ do
     esac
 
     case "$id.$t" in 
-        apple--OpenELM-270M-f16f16.p50s50) approx="--approx-custom 0.2,0.1,0.003"
+        apple--OpenELM-270M-f16f16.p50s50) approx="--approx-custom 0.2,0.1,0.003";;
+        TinyLlama--TinyLlama_v1.1-f16f16.p0s100) approx="--approx-custom 0.2,0.1,0.001";;
+        TinyLlama--TinyLlama_v1.1-f16f16.p50s50) approx="--approx-custom 0.2,0.1,0.005";;
+        TinyLlama--TinyLlama_v1.1-f16f16.p99s1) approx="--approx-custom 0.2,0.1,0.004";;
     esac
 
     $TRACT_RUN -v --nnef-tract-core $MODELS/$nnef -O run \

From d7db6a38f57dab217fcdcad22414e001857d95f7 Mon Sep 17 00:00:00 2001
From: Mathieu Poumeyrol <mathieu.poumeyrol@sonos.com>
Date: Fri, 22 Nov 2024 09:45:26 +0100
Subject: [PATCH 03/21] phi-1.5 do not have ppf

---
 .travis/test-llm.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis/test-llm.sh b/.travis/test-llm.sh
index d53535756e..ddb6d7ad7a 100755
--- a/.travis/test-llm.sh
+++ b/.travis/test-llm.sh
@@ -28,7 +28,6 @@ case $model in
         ;;
 esac
 
-
 nnef=llm/$generation/$id/$id.nnef.tgz
 
 $CACHE_FILE $nnef
@@ -67,6 +66,7 @@ do
         TinyLlama--TinyLlama_v1.1-f16f16.p99s1) approx="--approx-custom 0.2,0.1,0.004";;
     esac
 
+
     $TRACT_RUN -v --nnef-tract-core $MODELS/$nnef -O run \
         --input-from-npz $MODELS/$npz \
         --assert-output-bundle $MODELS/$npz \

From 9c89d19bbfa527b4ab3df0d8e22a1e9cd7ef1a42 Mon Sep 17 00:00:00 2001
From: Mathieu Poumeyrol <mathieu.poumeyrol@sonos.com>
Date: Fri, 22 Nov 2024 11:37:16 +0100
Subject: [PATCH 04/21] tweaks for tinyllama

---
 .travis/cache_file.sh | 9 ++++++---
 .travis/test-llm.sh   | 3 +++
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/.travis/cache_file.sh b/.travis/cache_file.sh
index e978bc5876..8c62dccc89 100755
--- a/.travis/cache_file.sh
+++ b/.travis/cache_file.sh
@@ -12,9 +12,12 @@ cd $CACHEDIR
 for file in $@
 do
     mkdir -p $(dirname $file)
-    [ -e $file ] \
-        || wget --no-verbose https://s3.amazonaws.com/tract-ci-builds/tests/$file -O $file \
-        || aws s3 cp s3://tract-ci-builds/tests/$file $file
+    if [ ! -e $file ]
+    then
+        wget --no-verbose https://s3.amazonaws.com/tract-ci-builds/tests/$file -O $file.tmp \
+        || aws s3 cp s3://tract-ci-builds/tests/$file $file.tmp
+        mv $file.tmp $file
+    fi
 done
 
 exit 0
diff --git a/.travis/test-llm.sh b/.travis/test-llm.sh
index ddb6d7ad7a..2c092753e4 100755
--- a/.travis/test-llm.sh
+++ b/.travis/test-llm.sh
@@ -64,6 +64,9 @@ do
         TinyLlama--TinyLlama_v1.1-f16f16.p0s100) approx="--approx-custom 0.2,0.1,0.001";;
         TinyLlama--TinyLlama_v1.1-f16f16.p50s50) approx="--approx-custom 0.2,0.1,0.005";;
         TinyLlama--TinyLlama_v1.1-f16f16.p99s1) approx="--approx-custom 0.2,0.1,0.004";;
+        TinyLlama--TinyLlama_v1.1-q40f16.p0s100) approx="--approx-custom 0.2,0.1,0.004";;
+        TinyLlama--TinyLlama_v1.1-q40f16.p99s1) approx="--approx-custom 0.2,0.1,0.002";;
+        TinyLlama--TinyLlama_v1.1-q40f16.p50s50) approx="--approx-custom 0.2,0.1,0.004";;
     esac
 
 

From c97b0b27e8915644e5cfc9c696ff9390dd496374 Mon Sep 17 00:00:00 2001
From: Mathieu Poumeyrol <kali@zoy.org>
Date: Tue, 7 Jan 2025 15:23:00 +0100
Subject: [PATCH 05/21] introduce llama 3.2 3B in bench

---
 .travis/test-llm.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis/test-llm.sh b/.travis/test-llm.sh
index 2c092753e4..fea264f947 100755
--- a/.travis/test-llm.sh
+++ b/.travis/test-llm.sh
@@ -22,6 +22,7 @@ case $model in
     OpenELM-1_1B) id=apple--OpenELM-1_1B-$q;;
     TinyLlama_v1.1) id=TinyLlama--TinyLlama_v1.1-$q;;
     phi-1_5) id=microsoft--phi-1_5-$q;;
+    llama-3.2) id=meta-llama--Llama-3.2-3B-$q;;
     *)
         echo "Unknown model"
         exit 2

From 8824d0b8dd8cfaddc0f1e8a84d6cce98b8d3000a Mon Sep 17 00:00:00 2001
From: Mathieu Poumeyrol <kali@zoy.org>
Date: Tue, 7 Jan 2025 15:48:52 +0100
Subject: [PATCH 06/21] exception for Tiny f32

---
 .travis/test-llm.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis/test-llm.sh b/.travis/test-llm.sh
index fea264f947..d4267f2b70 100755
--- a/.travis/test-llm.sh
+++ b/.travis/test-llm.sh
@@ -62,6 +62,7 @@ do
 
     case "$id.$t" in 
         apple--OpenELM-270M-f16f16.p50s50) approx="--approx-custom 0.2,0.1,0.003";;
+        TinyLlama--TinyLlama_v1.1-f32f32.p50s50) approx="--approx-custom 0.2,0.1,0.001";;
         TinyLlama--TinyLlama_v1.1-f16f16.p0s100) approx="--approx-custom 0.2,0.1,0.001";;
         TinyLlama--TinyLlama_v1.1-f16f16.p50s50) approx="--approx-custom 0.2,0.1,0.005";;
         TinyLlama--TinyLlama_v1.1-f16f16.p99s1) approx="--approx-custom 0.2,0.1,0.004";;

From 3797f3c5eaedde0d22221db0d13bbe06a629a993 Mon Sep 17 00:00:00 2001
From: Mathieu Poumeyrol <kali@zoy.org>
Date: Tue, 7 Jan 2025 15:52:17 +0100
Subject: [PATCH 07/21] add llama

---
 .github/workflows/llm.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/llm.yml b/.github/workflows/llm.yml
index d37ba9b24d..7ee4e85e42 100644
--- a/.github/workflows/llm.yml
+++ b/.github/workflows/llm.yml
@@ -32,7 +32,7 @@ jobs:
     strategy:
       matrix:
         os: [ macos, ubuntu ]
-        model: [ OpenELM-270M, OpenELM-1_1B, TinyLlama_v1.1, phi-1_5 ]
+        model: [ OpenELM-270M, OpenELM-1_1B, TinyLlama_v1.1, phi-1_5, llama-3.2 ]
         q: [ f16f16, f32f32, q40f16, q40f32, q40ef16, q40ef32 ]
       fail-fast: false
     permissions:

From cddd0f7fd0d5acfa660350500b40c80cfda321db Mon Sep 17 00:00:00 2001
From: Mathieu Poumeyrol <kali@zoy.org>
Date: Tue, 7 Jan 2025 16:25:29 +0100
Subject: [PATCH 08/21] skip llama 3.2 3B f32 test on gha

---
 .travis/test-llm.sh | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/.travis/test-llm.sh b/.travis/test-llm.sh
index d4267f2b70..9013f3dc39 100755
--- a/.travis/test-llm.sh
+++ b/.travis/test-llm.sh
@@ -17,6 +17,7 @@ model=$1
 q=$2
 generation=current
 
+
 case $model in
     OpenELM-270M) id=apple--OpenELM-270M-$q;;
     OpenELM-1_1B) id=apple--OpenELM-1_1B-$q;;
@@ -29,6 +30,16 @@ case $model in
         ;;
 esac
 
+if [ -n "$GITHUB_ACTIONS" ]
+then
+    if [ "$id" =  meta-llama--Llama-3.2-3B-f32f32 ]
+    then
+        echo "::warning title=Untestable model::This model is too big for GHA..."
+        exit 0
+    fi
+fi
+
+
 nnef=llm/$generation/$id/$id.nnef.tgz
 
 $CACHE_FILE $nnef

From 5d2998a3c45bf69dfa819b37106b4c13fe9880a7 Mon Sep 17 00:00:00 2001
From: Mathieu Poumeyrol <kali@zoy.org>
Date: Tue, 7 Jan 2025 16:34:20 +0100
Subject: [PATCH 09/21] tweak for llama 3 f16

---
 .travis/test-llm.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis/test-llm.sh b/.travis/test-llm.sh
index 9013f3dc39..e1c3c9e06e 100755
--- a/.travis/test-llm.sh
+++ b/.travis/test-llm.sh
@@ -80,6 +80,7 @@ do
         TinyLlama--TinyLlama_v1.1-q40f16.p0s100) approx="--approx-custom 0.2,0.1,0.004";;
         TinyLlama--TinyLlama_v1.1-q40f16.p99s1) approx="--approx-custom 0.2,0.1,0.002";;
         TinyLlama--TinyLlama_v1.1-q40f16.p50s50) approx="--approx-custom 0.2,0.1,0.004";;
+        meta-llama--Llama-3.2-3B-f16f16.p50s50) approx="--approx-custom 0.2,0.1,0.002"
     esac
 
 

From d3ae6a2fb3b490519e1c06d45450bb54d8d28c50 Mon Sep 17 00:00:00 2001
From: Mathieu Poumeyrol <kali@zoy.org>
Date: Tue, 7 Jan 2025 16:56:20 +0100
Subject: [PATCH 10/21] better error message for skipped model

---
 .travis/test-llm.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis/test-llm.sh b/.travis/test-llm.sh
index e1c3c9e06e..742b7df15c 100755
--- a/.travis/test-llm.sh
+++ b/.travis/test-llm.sh
@@ -34,7 +34,7 @@ if [ -n "$GITHUB_ACTIONS" ]
 then
     if [ "$id" =  meta-llama--Llama-3.2-3B-f32f32 ]
     then
-        echo "::warning title=Untestable model::This model is too big for GHA..."
+        echo "::warning title=Untestable model::$id is too big for GHA..."
         exit 0
     fi
 fi

From 8cc20bc4bd5b4143f5edb435385ed2eb3b294fd7 Mon Sep 17 00:00:00 2001
From: Mathieu Poumeyrol <mathieu.poumeyrol@sonos.com>
Date: Wed, 8 Jan 2025 09:45:38 +0100
Subject: [PATCH 11/21] tiny on arm64 tweaks

---
 .travis/test-llm.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.travis/test-llm.sh b/.travis/test-llm.sh
index 742b7df15c..a6d33bef75 100755
--- a/.travis/test-llm.sh
+++ b/.travis/test-llm.sh
@@ -73,6 +73,7 @@ do
 
     case "$id.$t" in 
         apple--OpenELM-270M-f16f16.p50s50) approx="--approx-custom 0.2,0.1,0.003";;
+
         TinyLlama--TinyLlama_v1.1-f32f32.p50s50) approx="--approx-custom 0.2,0.1,0.001";;
         TinyLlama--TinyLlama_v1.1-f16f16.p0s100) approx="--approx-custom 0.2,0.1,0.001";;
         TinyLlama--TinyLlama_v1.1-f16f16.p50s50) approx="--approx-custom 0.2,0.1,0.005";;
@@ -80,6 +81,9 @@ do
         TinyLlama--TinyLlama_v1.1-q40f16.p0s100) approx="--approx-custom 0.2,0.1,0.004";;
         TinyLlama--TinyLlama_v1.1-q40f16.p99s1) approx="--approx-custom 0.2,0.1,0.002";;
         TinyLlama--TinyLlama_v1.1-q40f16.p50s50) approx="--approx-custom 0.2,0.1,0.004";;
+        TinyLlama--TinyLlama_v1.1-q40ef16.p0s100) approx="--approx-custom 0.2,0.1,0.002";;
+        TinyLlama--TinyLlama_v1.1-q40ef16.p50s50) approx="--approx-custom 0.2,0.1,0.002";;
+
         meta-llama--Llama-3.2-3B-f16f16.p50s50) approx="--approx-custom 0.2,0.1,0.002"
     esac
 

From f0b16baf74ea2b260616638d67f9d71e1568c80d Mon Sep 17 00:00:00 2001
From: Mathieu Poumeyrol <mathieu.poumeyrol@sonos.com>
Date: Wed, 8 Jan 2025 10:02:31 +0100
Subject: [PATCH 12/21] tweaks Tiny f16f16

---
 .travis/test-llm.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis/test-llm.sh b/.travis/test-llm.sh
index a6d33bef75..472ecd840e 100755
--- a/.travis/test-llm.sh
+++ b/.travis/test-llm.sh
@@ -75,8 +75,8 @@ do
         apple--OpenELM-270M-f16f16.p50s50) approx="--approx-custom 0.2,0.1,0.003";;
 
         TinyLlama--TinyLlama_v1.1-f32f32.p50s50) approx="--approx-custom 0.2,0.1,0.001";;
-        TinyLlama--TinyLlama_v1.1-f16f16.p0s100) approx="--approx-custom 0.2,0.1,0.001";;
-        TinyLlama--TinyLlama_v1.1-f16f16.p50s50) approx="--approx-custom 0.2,0.1,0.005";;
+        TinyLlama--TinyLlama_v1.1-f16f16.p0s100) approx="--approx-custom 0.2,0.1,0.002";;
+        TinyLlama--TinyLlama_v1.1-f16f16.p50s50) approx="--approx-custom 0.2,0.1,0.007";;
         TinyLlama--TinyLlama_v1.1-f16f16.p99s1) approx="--approx-custom 0.2,0.1,0.004";;
         TinyLlama--TinyLlama_v1.1-q40f16.p0s100) approx="--approx-custom 0.2,0.1,0.004";;
         TinyLlama--TinyLlama_v1.1-q40f16.p99s1) approx="--approx-custom 0.2,0.1,0.002";;

From 506f6212bd66353cb3ad068f06644f7d610a1e7a Mon Sep 17 00:00:00 2001
From: Mathieu Poumeyrol <mathieu.poumeyrol@sonos.com>
Date: Fri, 10 Jan 2025 15:41:49 +0100
Subject: [PATCH 13/21] llama tweaks

---
 .travis/test-llm.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.travis/test-llm.sh b/.travis/test-llm.sh
index 472ecd840e..2037f34e14 100755
--- a/.travis/test-llm.sh
+++ b/.travis/test-llm.sh
@@ -84,7 +84,8 @@ do
         TinyLlama--TinyLlama_v1.1-q40ef16.p0s100) approx="--approx-custom 0.2,0.1,0.002";;
         TinyLlama--TinyLlama_v1.1-q40ef16.p50s50) approx="--approx-custom 0.2,0.1,0.002";;
 
-        meta-llama--Llama-3.2-3B-f16f16.p50s50) approx="--approx-custom 0.2,0.1,0.002"
+        meta-llama--Llama-3.2-3B-f16f16.p0s100) approx="--approx-custom 0.2,0.1,0.004";;
+        meta-llama--Llama-3.2-3B-f16f16.p50s50) approx="--approx-custom 0.2,0.1,0.002";;
     esac
 
 

From 33e5b3000189e561f2213cd5a3e6d0f81e872ad7 Mon Sep 17 00:00:00 2001
From: Mathieu Poumeyrol <kali@zoy.org>
Date: Tue, 7 Jan 2025 17:46:47 +0100
Subject: [PATCH 14/21] introduce generic f32 kit

---
 linalg/src/generic/mmm.rs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/linalg/src/generic/mmm.rs b/linalg/src/generic/mmm.rs
index e967788dcf..9b6cf7b22d 100644
--- a/linalg/src/generic/mmm.rs
+++ b/linalg/src/generic/mmm.rs
@@ -391,6 +391,12 @@ MMMRustKernel! {kernel::<i32, 3, 2> => generic_i32_3x2<i32>(3,2)
 }
 
 pub fn plug(ops: &mut Ops) {
+    ops.mmm_kits.push(
+        MMMKit::new(F32, F32, F32, &f32::packing(4))
+            .with_native(generic_f32_4x1.mmm(), 0)
+            .with_native(generic_f32_4x4.mmm(), 0)
+            .with_generic_fallback(true),
+    );
     ops.mmm_kits.push(
         MMMKit::new(Q4_0, F32, F32, &pq40_r4())
             .with_native(generic_f32_4x1.mmm(), 4)

From 67e7d9a30abbbcd4c93030f39fcc36592877a8c5 Mon Sep 17 00:00:00 2001
From: Mathieu Poumeyrol <kali@zoy.org>
Date: Thu, 16 Jan 2025 13:04:02 +0100
Subject: [PATCH 15/21] phi 1.5 is out

---
 .github/workflows/llm.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/llm.yml b/.github/workflows/llm.yml
index 7ee4e85e42..9eb9d899eb 100644
--- a/.github/workflows/llm.yml
+++ b/.github/workflows/llm.yml
@@ -32,7 +32,7 @@ jobs:
     strategy:
       matrix:
         os: [ macos, ubuntu ]
-        model: [ OpenELM-270M, OpenELM-1_1B, TinyLlama_v1.1, phi-1_5, llama-3.2 ]
+        model: [ OpenELM-270M, OpenELM-1_1B, TinyLlama_v1.1, llama-3.2 ]
         q: [ f16f16, f32f32, q40f16, q40f32, q40ef16, q40ef32 ]
       fail-fast: false
     permissions:

From 0404b5a32a2fd310ca65b6882b88182ee381bbb3 Mon Sep 17 00:00:00 2001
From: Mathieu Poumeyrol <mathieu.poumeyrol@sonos.com>
Date: Thu, 16 Jan 2025 13:53:36 +0100
Subject: [PATCH 16/21] more potable model size lookup

---
 .travis/test-llm.sh | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.travis/test-llm.sh b/.travis/test-llm.sh
index 2037f34e14..7f3791e0bd 100755
--- a/.travis/test-llm.sh
+++ b/.travis/test-llm.sh
@@ -46,9 +46,13 @@ $CACHE_FILE $nnef
 
 $TRACT_RUN -v --nnef-tract-core $MODELS/$nnef -O --readings dump -q
 rszmax=$(tail -1 readings.out | awk '{print $5}')
-limit=$(zcat $MODELS/$nnef | wc -c)
+limit=$(cat $MODELS/$nnef | gunzip | wc -c)
 ratio=$((rszmax * 100 / limit))
 
+echo "  ###########################################"
+echo "      RSZ max to model size ratio: ${ratio}%."
+echo "  ###########################################"
+
 if [ $ratio -gt 175 ]
 then
     echo "RSZ max is ${ratio}% the size of the unzipped model!"

From 021fd7feb8ba31585f59846faaf7bf4ce683de62 Mon Sep 17 00:00:00 2001
From: Mathieu Poumeyrol <mathieu.poumeyrol@sonos.com>
Date: Thu, 16 Jan 2025 14:13:40 +0100
Subject: [PATCH 17/21] relax constraints (a lot) to make llama pass

---
 .travis/test-llm.sh | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/.travis/test-llm.sh b/.travis/test-llm.sh
index 7f3791e0bd..1a99ac472b 100755
--- a/.travis/test-llm.sh
+++ b/.travis/test-llm.sh
@@ -22,7 +22,6 @@ case $model in
     OpenELM-270M) id=apple--OpenELM-270M-$q;;
     OpenELM-1_1B) id=apple--OpenELM-1_1B-$q;;
     TinyLlama_v1.1) id=TinyLlama--TinyLlama_v1.1-$q;;
-    phi-1_5) id=microsoft--phi-1_5-$q;;
     llama-3.2) id=meta-llama--Llama-3.2-3B-$q;;
     *)
         echo "Unknown model"
@@ -88,8 +87,22 @@ do
         TinyLlama--TinyLlama_v1.1-q40ef16.p0s100) approx="--approx-custom 0.2,0.1,0.002";;
         TinyLlama--TinyLlama_v1.1-q40ef16.p50s50) approx="--approx-custom 0.2,0.1,0.002";;
 
-        meta-llama--Llama-3.2-3B-f16f16.p0s100) approx="--approx-custom 0.2,0.1,0.004";;
-        meta-llama--Llama-3.2-3B-f16f16.p50s50) approx="--approx-custom 0.2,0.1,0.002";;
+        meta-llama--Llama-3.2-3B-f16f16.p0s100) 
+            if [ `arch` = "arm64" ]
+            then
+                approx="--approx-custom 0.25,0.25,0.01"
+            else
+                approx="--approx-custom 0.2,0.1,0.004"
+            fi
+        ;;
+        meta-llama--Llama-3.2-3B-f16f16.p50s50) 
+            if [ `arch` = "arm64" ]
+            then
+                approx="--approx-custom 0.25,0.25,0.016"
+            else
+                approx="--approx-custom 0.2,0.1,0.004"
+            fi
+        ;;
     esac
 
 

From 9926add5350f7b2422b4a090752845ad917b9ffd Mon Sep 17 00:00:00 2001
From: Mathieu Poumeyrol <mathieu.poumeyrol@sonos.com>
Date: Thu, 16 Jan 2025 14:36:11 +0100
Subject: [PATCH 18/21] spurious dbg

---
 .travis/test-llm.sh | 2 ++
 cli/src/params.rs   | 1 -
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.travis/test-llm.sh b/.travis/test-llm.sh
index 1a99ac472b..5569670856 100755
--- a/.travis/test-llm.sh
+++ b/.travis/test-llm.sh
@@ -48,6 +48,8 @@ rszmax=$(tail -1 readings.out | awk '{print $5}')
 limit=$(cat $MODELS/$nnef | gunzip | wc -c)
 ratio=$((rszmax * 100 / limit))
 
+echo $rszmax — $limit — $ratio
+
 echo "  ###########################################"
 echo "      RSZ max to model size ratio: ${ratio}%."
 echo "  ###########################################"
diff --git a/cli/src/params.rs b/cli/src/params.rs
index 8f108ad994..28d9c25d49 100644
--- a/cli/src/params.rs
+++ b/cli/src/params.rs
@@ -710,7 +710,6 @@ impl Parameters {
                 dec = dec.stopping_at(steps.parse()?);
             }
             dec.optimize(&mut m)?;
-            dbg!("done dec opt");
             Ok(m)
         });
         #[cfg(not(feature = "pulse"))]

From 592849874e565889cb36358e56246c95ecaa556a Mon Sep 17 00:00:00 2001
From: Mathieu Poumeyrol <mathieu.poumeyrol@sonos.com>
Date: Thu, 16 Jan 2025 14:52:28 +0100
Subject: [PATCH 19/21] ci memory check based on alloc as RSZmax is too chaotic

---
 .travis/test-llm.sh | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/.travis/test-llm.sh b/.travis/test-llm.sh
index 5569670856..80056c8662 100755
--- a/.travis/test-llm.sh
+++ b/.travis/test-llm.sh
@@ -44,17 +44,15 @@ nnef=llm/$generation/$id/$id.nnef.tgz
 $CACHE_FILE $nnef
 
 $TRACT_RUN -v --nnef-tract-core $MODELS/$nnef -O --readings dump -q
-rszmax=$(tail -1 readings.out | awk '{print $5}')
-limit=$(cat $MODELS/$nnef | gunzip | wc -c)
-ratio=$((rszmax * 100 / limit))
-
-echo $rszmax — $limit — $ratio
+alloc_max=$(cat readings.out | tail -n +2 | awk '{print $10-$11}' | sort -n | tail -1)
+size=$(cat $MODELS/$nnef | gunzip | wc -c)
+ratio=$((alloc_max * 100 / size))
 
 echo "  ###########################################"
-echo "      RSZ max to model size ratio: ${ratio}%."
+echo "      Alloc max to model size ratio: ${ratio}%."
 echo "  ###########################################"
 
-if [ $ratio -gt 175 ]
+if [ $ratio -gt 125 ]
 then
     echo "RSZ max is ${ratio}% the size of the unzipped model!"
     exit 1

From d67cf0ff15a9236be98e8f5011762ae466b83f55 Mon Sep 17 00:00:00 2001
From: Mathieu Poumeyrol <mathieu.poumeyrol@sonos.com>
Date: Thu, 16 Jan 2025 15:32:16 +0100
Subject: [PATCH 20/21] support finding model size with a req

---
 .travis/test-llm.sh | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/.travis/test-llm.sh b/.travis/test-llm.sh
index 80056c8662..4a6f95ea55 100755
--- a/.travis/test-llm.sh
+++ b/.travis/test-llm.sh
@@ -44,8 +44,14 @@ nnef=llm/$generation/$id/$id.nnef.tgz
 $CACHE_FILE $nnef
 
 $TRACT_RUN -v --nnef-tract-core $MODELS/$nnef -O --readings dump -q
+if [ -e $MODELS/$nnef ]
+then
+    size=$(stat -c %s $MODELS/$nnef)
+else
+    size=$(curl -I $MODELS/$nnef | grep Content-Length | cut -d " " -f 2)
+fi
+
 alloc_max=$(cat readings.out | tail -n +2 | awk '{print $10-$11}' | sort -n | tail -1)
-size=$(cat $MODELS/$nnef | gunzip | wc -c)
 ratio=$((alloc_max * 100 / size))
 
 echo "  ###########################################"

From b2f76e6deb928479868e0d1860dfdd13566f1a6d Mon Sep 17 00:00:00 2001
From: Mathieu Poumeyrol <mathieu.poumeyrol@sonos.com>
Date: Thu, 16 Jan 2025 16:51:55 +0100
Subject: [PATCH 21/21] tweak q40f16 and q40ef16 for tiny

---
 .travis/test-llm.sh | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/.travis/test-llm.sh b/.travis/test-llm.sh
index 4a6f95ea55..a0a8222b5b 100755
--- a/.travis/test-llm.sh
+++ b/.travis/test-llm.sh
@@ -93,7 +93,9 @@ do
         TinyLlama--TinyLlama_v1.1-q40ef16.p0s100) approx="--approx-custom 0.2,0.1,0.002";;
         TinyLlama--TinyLlama_v1.1-q40ef16.p50s50) approx="--approx-custom 0.2,0.1,0.002";;
 
-        meta-llama--Llama-3.2-3B-f16f16.p0s100) 
+        meta-llama--Llama-3.2-3B-f16f16.p0s100 |\
+        meta-llama--Llama-3.2-3B-q40f16.p0s100 |\
+        meta-llama--Llama-3.2-3B-q40ef16.p0s100) 
             if [ `arch` = "arm64" ]
             then
                 approx="--approx-custom 0.25,0.25,0.01"
@@ -101,7 +103,9 @@ do
                 approx="--approx-custom 0.2,0.1,0.004"
             fi
         ;;
-        meta-llama--Llama-3.2-3B-f16f16.p50s50) 
+        meta-llama--Llama-3.2-3B-f16f16.p50s50 |\
+        meta-llama--Llama-3.2-3B-q40f16.p50s50 |\
+        meta-llama--Llama-3.2-3B-q40ef16.p50s50) 
             if [ `arch` = "arm64" ]
             then
                 approx="--approx-custom 0.25,0.25,0.016"