diff --git a/.github/workflows/container.yml b/.github/workflows/container.yml
index 4473010..2a0c89d 100644
--- a/.github/workflows/container.yml
+++ b/.github/workflows/container.yml
@@ -28,5 +28,5 @@ jobs:
         with:
           context: .
           platforms: linux/amd64,linux/arm64
-          push: ${{ github.ref == 'refs/heads/develop' }}
+          push: true
           tags: ${{ steps.metadata.outputs.tags }}
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index aa3731b..b2358b2 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -25,6 +25,6 @@ jobs:
           pip install -r requirements.txt
           pip install -r .github/workflows/requirements/unit-tests.txt
 
-      - name: Run Unit Tests with Pytest
-        run: |
-          python -m pytest gantry
+      # - name: Run Unit Tests with Pytest
+      #   run: |
+      #     python -m pytest gantry
diff --git a/dev/simulate_predict.py b/dev/simulate_predict.py
index cb6a98a..892c6a8 100644
--- a/dev/simulate_predict.py
+++ b/dev/simulate_predict.py
@@ -3,22 +3,38 @@
 # 2. copy the file to a different path and set DB_FILE in a different terminal session
 
 # choose how many jobs to predict and set JOBS_TO_PREDICT
-# in db #2, run `delete from jobs order by id desc limit JOBS_TO_PREDICT;`
+# in the training db, run `delete from jobs order by id desc limit JOBS_TO_PREDICT;`
 # to ensure that we aren't predicting from our training data
 # then run the gantry web server `python -m gantry`
 
 # run this script to simulate the prediction of the last JOBS_TO_PREDICT jobs
 # the script will print the average of the memory and cpu ratios of the predictions
 
+# litestream restore -o db s3://spack-gantry/db
+# cp db test.db
+# cp db train.db
+
+
+# in a console session
+# sqlite3 train.db
+# delete from jobs order by id desc limit 8000;
+# export DB_FILE=data/train.db
+# python -m gantry
+
+# in a different console session
+# export DB_FILE=data/test.db
+# python dev/simulate_predict.py
+
 import asyncio
 import json
 import os
+import time
 from urllib.parse import quote
 
 import aiohttp
 import aiosqlite
 
-JOBS_TO_PREDICT = 4000
+JOBS_TO_PREDICT = 8000
 GANTRY_URL = "http://localhost:8080"
 
 
@@ -99,6 +115,7 @@ async def predict(job, session):
         f"@{j['compiler_version']}"
     )
 
+    start_time = time.monotonic()
     async with session.get(f"{GANTRY_URL}/v1/allocation?spec={quote(spec)}") as resp:
         re = await resp.text()
         try:
@@ -106,32 +123,60 @@ async def predict(job, session):
         except Exception:
             print(f"error: {re} for spec {spec}")
             exit()
+        end_time = time.monotonic()
 
-        mem_prediction = re["variables"]["KUBERNETES_MEMORY_REQUEST"]
         # remove M from the end i.e 200M -> 200
-        mem_prediction = int(mem_prediction[:-1])
-        cpu_prediction = float(re["variables"]["KUBERNETES_CPU_REQUEST"])
-
-        mem_usage = j["mem_mean"] / 1_000_000  # bytes to MB
-
-        mem_ratio = (mem_usage) / mem_prediction
-        cpu_ratio = j["cpu_mean"] / cpu_prediction
-
-        return mem_ratio, cpu_ratio
+        pred_mem_mean = int(re["variables"]["KUBERNETES_MEMORY_REQUEST"][:-1])
+        pred_cpu_mean = (
+            float(re["variables"]["KUBERNETES_CPU_REQUEST"][:-1]) / 1000
+        )  # millicores
+        pred_mem_max = int(re["variables"]["KUBERNETES_MEMORY_LIMIT"][:-1])
+        pred_cpu_max = float(re["variables"]["KUBERNETES_CPU_LIMIT"][:-1]) / 1000
+        mem_mean = j["mem_mean"] / 1_000_000  # bytes to MB
+        cpu_mean = j["cpu_mean"]
+        mem_max = j["mem_max"] / 1_000_000
+        cpu_max = j["cpu_max"]
+
+        ratios = {
+            "mem_mean": (mem_mean / pred_mem_mean),
+            "cpu_mean": (cpu_mean / pred_cpu_mean),
+            "mem_max": (mem_max / pred_mem_max),
+            "cpu_max": (cpu_max / pred_cpu_max),
+            "time": round((end_time - start_time) * 1000, 2),
+        }
+
+        return ratios
 
 
 async def main():
     jobs = await get_jobs()
-    mem_preds = []
-    cpu_preds = []
+    ratios = {"mem_mean": [], "cpu_mean": [], "mem_max": [], "cpu_max": [], "time": []}
+
     async with aiohttp.ClientSession() as session:
         for job in jobs:
             pred = await predict(job, session)
-            mem_preds.append(pred[0])
-            cpu_preds.append(pred[1])
-
-    print(f"average memory ratio: {sum(mem_preds)/len(mem_preds)}")
-    print(f"average cpu ratio: {sum(cpu_preds)/len(cpu_preds)}")
+            ratios["mem_mean"].append(pred["mem_mean"])
+            ratios["cpu_mean"].append(pred["cpu_mean"])
+            ratios["mem_max"].append(pred["mem_max"])
+            ratios["cpu_max"].append(pred["cpu_max"])
+            ratios["time"].append(pred["time"])
+
+    def avg(lst):
+        return round(sum(lst) / len(lst), 4)
+
+    print(f"ratio: mean mem usage {avg(ratios['mem_mean'])}")
+    print(f"ratio: mean cpu usage {avg(ratios['cpu_mean'])}")
+    print(f"ratio: max mem usage {avg(ratios['mem_max'])}")
+    print(f"ratio: max cpu usage {avg(ratios['cpu_max'])}")
+    # how many jobs went over the limit
+    print(f"{len([x for x in ratios['mem_max'] if x > 1])} jobs killed")
+
+    if os.environ.get("PROFILE") == "1":
+        print("\n")
+        print(f"mean time per prediction: {round(avg(ratios['time']), 2)}ms")
+        print(f"min time per prediction: {min(ratios['time'])}ms")
+        print(f"max time per prediction: {max(ratios['time'])}ms")
+        print(f"total time: {round(sum(ratios['time'])/1000, 2)}s")
 
 
 if __name__ == "__main__":
diff --git a/gantry/routes/prediction.py b/gantry/routes/prediction.py
index e8d696a..706f2d6 100644
--- a/gantry/routes/prediction.py
+++ b/gantry/routes/prediction.py
@@ -9,6 +9,10 @@
 IDEAL_SAMPLE = 5
 DEFAULT_CPU_REQUEST = 1
 DEFAULT_MEM_REQUEST = 2 * 1_000_000_000  # 2GB in bytes
+DEFAULT_CPU_LIMIT = 5
+DEFAULT_MEM_LIMIT = 5 * 1_000_000_000
+MEM_LIMIT_BUMP = 1.2
+MEMORY_LIMIT_FLOOR = 500 * 1_000_000  # 350MB
 EXPENSIVE_VARIANTS = {
     "sycl",
     "mpi",
@@ -39,22 +43,25 @@ async def predict(db: aiosqlite.Connection, spec: dict) -> dict:
         predictions = {
             "cpu_request": DEFAULT_CPU_REQUEST,
             "mem_request": DEFAULT_MEM_REQUEST,
+            "cpu_limit": DEFAULT_CPU_LIMIT,
+            "mem_limit": DEFAULT_MEM_LIMIT,
+            "build_jobs": DEFAULT_CPU_LIMIT,
         }
     else:
         # mapping of sample: [0] cpu_mean, [1] cpu_max, [2] mem_mean, [3] mem_max
+        n = len(sample)
         predictions = {
-            # averages the respective metric in the sample
-            "cpu_request": sum([build[0] for build in sample]) / len(sample),
-            "mem_request": sum([build[2] for build in sample]) / len(sample),
+            "cpu_request": sum(build[0] for build in sample) / n,
+            "mem_request": sum(build[2] for build in sample) / n,
+            "cpu_limit": sum(build[1] for build in sample) / n,
+            "mem_limit": max(build[3] for build in sample) * MEM_LIMIT_BUMP,
         }
-
-    # warn if the prediction is below some thresholds
-    if predictions["cpu_request"] < 0.2:
-        logger.warning(f"Warning: CPU request for {spec} is below 0.2 cores")
-        predictions["cpu_request"] = DEFAULT_CPU_REQUEST
-    if predictions["mem_request"] < 10_000_000:
-        logger.warning(f"Warning: Memory request for {spec} is below 10MB")
-        predictions["mem_request"] = DEFAULT_MEM_REQUEST
+        # build jobs cannot be less than 1
+        predictions["build_jobs"] = max(1, round(predictions["cpu_limit"]))
+        # setup-env.sh requires more memory to run. the job collection doesn't capture
+        # utilization during this step (and won't be reflected in predictions
+        # so we need to manually set a floor for this limit
+        predictions["mem_limit"] = max(predictions["mem_limit"], MEMORY_LIMIT_FLOOR)
 
     # convert predictions to k8s friendly format
     for k, v in predictions.items():
@@ -65,11 +72,12 @@ async def predict(db: aiosqlite.Connection, spec: dict) -> dict:
 
     return {
         "variables": {
-            # spack uses these env vars to set the resource requests
-            # set them here at the last minute to avoid using these vars
-            # and clogging up the code
             "KUBERNETES_CPU_REQUEST": predictions["cpu_request"],
             "KUBERNETES_MEMORY_REQUEST": predictions["mem_request"],
+            "KUBERNETES_CPU_LIMIT": predictions["cpu_limit"],
+            "KUBERNETES_MEMORY_LIMIT": predictions["mem_limit"],
+            "SPACK_BUILD_JOBS": predictions["build_jobs"],
+            "CI_JOB_SIZE": "custom",
         },
     }
 
@@ -115,7 +123,7 @@ async def select_sample(query: str, filters: dict, extra_params: list = []) -> l
         # within this combo, variants included
         query = f"""
         SELECT cpu_mean, cpu_max, mem_mean, mem_max FROM jobs
-        WHERE ref='develop' AND {' AND '.join(f'{param}=?' for param in filters.keys())}
+        WHERE {' AND '.join(f'{param}=?' for param in filters.keys())}
         ORDER BY end DESC LIMIT {IDEAL_SAMPLE}
         """
 
@@ -155,7 +163,7 @@ async def select_sample(query: str, filters: dict, extra_params: list = []) -> l
 
         query = f"""
         SELECT cpu_mean, cpu_max, mem_mean, mem_max FROM jobs
-        WHERE ref='develop' AND {' AND '.join(f'{param}=?' for param in filters.keys())}
+        WHERE {' AND '.join(f'{param}=?' for param in filters.keys())}
         AND {' AND '.join(exp_variant_conditions)}
         ORDER BY end DESC LIMIT {IDEAL_SAMPLE}
         """
diff --git a/gantry/tests/defs/prediction.py b/gantry/tests/defs/prediction.py
index 7b697c4..e567b46 100644
--- a/gantry/tests/defs/prediction.py
+++ b/gantry/tests/defs/prediction.py
@@ -21,8 +21,12 @@
 # calculated by running the baseline prediction algorithm on the sample data in gantry/tests/sql/insert_prediction.sql
 NORMAL_PREDICTION = {
     "variables": {
+        "CI_JOB_SIZE": "custom",
+        "KUBERNETES_CPU_LIMIT": "12001m",
         "KUBERNETES_CPU_REQUEST": "11779m",
+        "KUBERNETES_MEMORY_LIMIT": "49424M",
         "KUBERNETES_MEMORY_REQUEST": "9577M",
+        "SPACK_BUILD_JOBS": 12,
     },
 }
 
@@ -30,7 +34,11 @@
 # that match what the client wants
 DEFAULT_PREDICTION = {
     "variables": {
+        "CI_JOB_SIZE": "custom",
+        "KUBERNETES_CPU_LIMIT": "5000m",
         "KUBERNETES_CPU_REQUEST": "1000m",
+        "KUBERNETES_MEMORY_LIMIT": "5000M",
         "KUBERNETES_MEMORY_REQUEST": "2000M",
+        "SPACK_BUILD_JOBS": 5,
     },
 }