MannLabs · GeorgWa · Jan 24, 2025 · Jan 23, 2025 · Jan 23, 2025 · Jan 23, 2025
diff --git a/alphadia/constants/default.yaml b/alphadia/constants/default.yaml
@@ -233,7 +233,12 @@ fdr:
   keep_decoys: false
   channel_wise_fdr: false
   inference_strategy: "heuristic"
+  # (Experimental)
+  # uses a two-step classifier consisting of a logistic regression and a neural network
   enable_two_step_classifier: false
+  # (Experimental)
+  # Optimizes the batch size and learning rate of the neural network
+  enable_nn_hyperparameter_tuning: false
 
 search_output:
   peptide_level_lfq: false

diff --git a/alphadia/fdrexperimental.py b/alphadia/fdrexperimental.py
@@ -908,7 +908,7 @@ def predict_proba(self, x: np.ndarray):
         return self.network(torch.Tensor(x)).detach().numpy()
 
 
-def get_scaled_training_params(df, base_lr=0.001, max_batch=1024, min_batch=64):
+def get_scaled_training_params(df, base_lr=0.001, max_batch=4096, min_batch=128):
     """
     Scale batch size and learning rate based on dataframe size using square root relationship.
 
@@ -921,7 +921,7 @@ def get_scaled_training_params(df, base_lr=0.001, max_batch=1024, min_batch=64):
     max_batch : int, optional
         Maximum batch size (1024 for >= 1M samples), defaults to 1024
     min_batch : int, optional
-        Minimum batch size, defaults to 32
+        Minimum batch size, defaults to 128
 
     Returns
     -------

diff --git a/alphadia/workflow/peptidecentric.py b/alphadia/workflow/peptidecentric.py
@@ -97,7 +97,9 @@
 
 
 def get_classifier_base(
-    enable_two_step_classifier: bool = False, fdr_cutoff: float = 0.01
+    enable_two_step_classifier: bool = False,
+    enable_nn_hyperparameter_tuning: bool = False,
+    fdr_cutoff: float = 0.01,
 ):
     """Creates and returns a classifier base instance.
 
@@ -106,6 +108,11 @@ def get_classifier_base(
     enable_two_step_classifier : bool, optional
         If True, uses logistic regression + neural network.
         If False (default), uses only neural network.
+
+    enable_nn_hyperparameter_tuning: bool, optional
+        If True, uses hyperparameter tuning for the neural network.
+        If False (default), uses default hyperparameters for the neural network.
+
     fdr_cutoff : float, optional
         The FDR cutoff threshold used by the second classifier when two-step
         classification is enabled. Default is 0.01.
@@ -120,7 +127,7 @@ def get_classifier_base(
         batch_size=5000,
         learning_rate=0.001,
         epochs=10,
-        experimental_hyperparameter_tuning=True,
+        experimental_hyperparameter_tuning=enable_nn_hyperparameter_tuning,
     )
 
     if enable_two_step_classifier:
@@ -168,8 +175,13 @@ def init_fdr_manager(self):
         self.fdr_manager = manager.FDRManager(
             feature_columns=feature_columns,
             classifier_base=get_classifier_base(
-                self.config["fdr"]["enable_two_step_classifier"],
-                self.config["fdr"]["fdr"],
+                enable_two_step_classifier=self.config["fdr"][
+                    "enable_two_step_classifier"
+                ],
+                enable_nn_hyperparameter_tuning=self.config["fdr"][
+                    "enable_nn_hyperparameter_tuning"
+                ],
+                fdr_cutoff=self.config["fdr"]["fdr"],
             ),
         )
 

diff --git a/gui/workflows/PeptideCentric.v1.json b/gui/workflows/PeptideCentric.v1.json
@@ -72,14 +72,14 @@
             "parameters": [
                 {
                     "id": "transfer_step_enabled",
-                    "name": "Add 'transfer learning' step",
+                    "name": "Transfer Learning Step (Experimental)",
                     "value": false,
                     "description": "Whether to perform a 'transfer learning' step before the first search. All parameters set here will also be used for this step (except those required to switch on the specific behaviour of this step).",
                     "type": "boolean"
                 },
                 {
                     "id": "mbr_step_enabled",
-                    "name": "Add 'second search' step",
+                    "name": "MBR Search Step (Experimental)",
                     "value": false,
                     "description": "Whether to perform a 'second search' step after the first search. All parameters set here will also be used for this step (except those required to switch on the specific behaviour of this step).",
                     "type": "boolean"
@@ -402,6 +402,20 @@
                     "value": false,
                     "description": "If enabled, decoy PSMs will be retained in the output.",
                     "type": "boolean"
+                },
+                {
+                    "id": "enable_two_step_classifier",
+                    "name": "Two Step Classifier (Experimental)",
+                    "value": false,
+                    "description": "If enabled, a two step classifier consisting of a linear filter and a neural network will be used.",
+                    "type": "boolean"
+                },
+                {
+                    "id": "enable_nn_hyperparameter_tuning",
+                    "name": "Hyperparameter Tuning (Experimental)",
+                    "value": false,
+                    "description": "If enabled, the hyperparameters of the neural network like the batch size and learning rate will be tuned.",
+                    "type": "boolean"
                 }
             ]
         },

diff --git a/requirements/requirements_loose.txt b/requirements/requirements_loose.txt
@@ -4,7 +4,8 @@ numba
 argparse
 alpharaw>=0.3.1  # test: tolerate_version
 alphatims
-alphabase>=1.4.0 # test: tolerate_version
+# TODO remove once compatible with alphabase>=1.5.0
+alphabase>=1.4.0,<1.5.0 # test: tolerate_version
 peptdeep>=1.3.0 # test: tolerate_version
 dask==2024.11.2 # test: tolerate_version
 progressbar

diff --git a/tests/unit_tests/test_fdrx_base.py b/tests/unit_tests/test_fdrx_base.py
@@ -53,14 +53,14 @@ def test_target_decoy_fdr(mock_show):
     "n_samples,expected_batch,expected_lr",
     [
         # Large dataset case (≥1M samples)
-        (1_000_000, 1024, 0.001),
-        (2_000_000, 1024, 0.001),
+        (1_000_000, 4096, 0.001),
+        (2_000_000, 4096, 0.001),
         # Mid-size dataset cases
-        (500_000, 512, 0.001 * np.sqrt(512 / 1024)),  # 50% of max
-        (250_000, 256, 0.001 * np.sqrt(256 / 1024)),  # 25% of max
+        (500_000, 2048, 0.001 * np.sqrt(2048 / 4096)),  # 50% of max
+        (250_000, 1024, 0.001 * np.sqrt(1024 / 4096)),  # 25% of max
         # Small dataset cases
-        (50_000, 64, 0.001 * np.sqrt(64 / 1024)),  # Should hit min batch size
-        (1_000, 64, 0.001 * np.sqrt(64 / 1024)),  # Should hit min batch size
+        (25_000, 128, 0.001 * np.sqrt(128 / 4096)),  # Should hit min batch size
+        (1_000, 128, 0.001 * np.sqrt(128 / 4096)),  # Should hit min batch size
     ],
 )
 def test_get_scaled_training_params(n_samples, expected_batch, expected_lr):