MannLabs · mschwoer · Jan 9, 2025 · Nov 20, 2024 · Nov 20, 2024 · Nov 20, 2024
diff --git a/alphabase/psm_reader/alphapept_reader.py b/alphabase/psm_reader/alphapept_reader.py
@@ -1,7 +1,7 @@
 """Reader for AlphaPept's *.ms_data.hdf files."""
 
 from pathlib import Path
-from typing import Optional, Tuple
+from typing import Tuple
 
 import h5py
 import numba
@@ -54,29 +54,12 @@ class AlphaPeptReader(PSMReaderBase):
     _reader_type = "alphapept"
     _modification_type = "alphapept"
 
-    def __init__(
-        self,
-        *,
-        column_mapping: Optional[dict] = None,
-        modification_mapping: Optional[dict] = None,
-        fdr: float = 0.01,
-        keep_decoy: bool = False,
-        **kwargs,
-    ):
-        """Reading PSMs from alphapept's *.ms_data.hdf."""
-        super().__init__(
-            column_mapping=column_mapping,
-            modification_mapping=modification_mapping,
-            fdr=fdr,
-            keep_decoy=keep_decoy,
-            **kwargs,
-        )
-        self.hdf_dataset = "identifications"
-
     def _load_file(self, filename: str) -> pd.DataFrame:
         """Load an AlphaPept output file to a DataFrame."""
         with h5py.File(filename, "r") as _hdf:
-            dataset = _hdf[self.hdf_dataset]
+            dataset = _hdf[
+                "identifications"
+            ]  # TODO: "identifications" could be moved to yaml
             df = pd.DataFrame({col: dataset[col] for col in dataset})
 
         # TODO: make this more stable

diff --git a/alphabase/psm_reader/dia_psm_reader.py b/alphabase/psm_reader/dia_psm_reader.py
@@ -1,7 +1,5 @@
 """Readers for Spectronaut's output library and reports, Swath data and DIANN data."""
 
-from typing import List, Optional
-
 import numpy as np
 import pandas as pd
 
@@ -19,32 +17,8 @@ class SpectronautReader(MaxQuantReader):
 
     _reader_type = "spectronaut"
     _add_unimod_to_mod_mapping = True
-
-    def __init__(  # noqa: PLR0913 many arguments in function definition
-        self,
-        *,
-        column_mapping: Optional[dict] = None,
-        modification_mapping: Optional[dict] = None,
-        fdr: float = 0.01,
-        keep_decoy: bool = False,
-        fixed_C57: bool = False,  # noqa: N803 TODO: make this  *,fixed_c57  (breaking)
-        mod_seq_columns: Optional[List[str]] = None,
-        rt_unit: str = "minute",
-        **kwargs,
-    ):
-        """Initialize SpectronautReader."""
-        super().__init__(
-            column_mapping=column_mapping,
-            modification_mapping=modification_mapping,
-            fdr=fdr,
-            keep_decoy=keep_decoy,
-            mod_seq_columns=mod_seq_columns,
-            fixed_C57=fixed_C57,
-            rt_unit=rt_unit,
-            **kwargs,
-        )
-
-        self._min_max_rt_norm = True
+    _min_max_rt_norm = True
+    _fixed_c57_default = False
 
     def _pre_process(self, df: pd.DataFrame) -> pd.DataFrame:
         """Spectronaut-specific preprocessing of output data."""
@@ -65,58 +39,13 @@ class SwathReader(SpectronautReader):
     _reader_type = "spectronaut"  # no typo
     _add_unimod_to_mod_mapping = True
 
-    def __init__(  # noqa: PLR0913 many arguments in function definition
-        self,
-        *,
-        column_mapping: Optional[dict] = None,
-        modification_mapping: Optional[dict] = None,
-        fdr: float = 0.01,
-        keep_decoy: bool = False,
-        fixed_C57: bool = False,  # noqa: N803 TODO: make this  *,fixed_c57  (breaking)
-        mod_seq_columns: Optional[List[str]] = None,
-        **kwargs,
-    ):
-        """SWATH or OpenSWATH library, similar to `SpectronautReader`."""
-        super().__init__(
-            column_mapping=column_mapping,
-            modification_mapping=modification_mapping,
-            fdr=fdr,
-            keep_decoy=keep_decoy,
-            fixed_C57=fixed_C57,
-            mod_seq_columns=mod_seq_columns,
-            **kwargs,
-        )
-
 
 class DiannReader(MaxQuantReader):
     """Reader for DIANN data."""
 
     _reader_type = "diann"
     _add_unimod_to_mod_mapping = True
-
-    def __init__(  # noqa: PLR0913 many arguments in function definition
-        self,
-        *,
-        column_mapping: Optional[dict] = None,
-        modification_mapping: Optional[dict] = None,
-        fdr: float = 0.01,
-        keep_decoy: bool = False,
-        fixed_C57: bool = False,  # noqa: N803 TODO: make this  *,fixed_c57  (breaking)
-        rt_unit: str = "minute",
-        **kwargs,
-    ):
-        """Similar to `SpectronautReader` but different in column_mapping and modification_mapping."""
-        super().__init__(
-            column_mapping=column_mapping,
-            modification_mapping=modification_mapping,
-            fdr=fdr,
-            keep_decoy=keep_decoy,
-            fixed_C57=fixed_C57,
-            rt_unit=rt_unit,
-            **kwargs,
-        )
-
-        self._min_max_rt_norm = False
+    _min_max_rt_norm = False
 
     def _pre_process(self, df: pd.DataFrame) -> pd.DataFrame:
         """DIANN-specific preprocessing of output data.
@@ -142,36 +71,12 @@ class SpectronautReportReader(MaxQuantReader):
 
     _reader_type = "spectronaut_report"
     _add_unimod_to_mod_mapping = True
-
-    def __init__(  # noqa: PLR0913 many arguments in function definition
-        self,
-        *,
-        column_mapping: Optional[dict] = None,
-        modification_mapping: Optional[dict] = None,
-        fdr: float = 0.01,
-        keep_decoy: bool = False,
-        fixed_C57: bool = False,  # noqa: N803 TODO: make this  *,fixed_c57  (breaking)
-        rt_unit: str = "minute",
-        **kwargs,
-    ):
-        """Initialize SpectronautReportReader."""
-        super().__init__(
-            column_mapping=column_mapping,
-            modification_mapping=modification_mapping,
-            fdr=fdr,
-            keep_decoy=keep_decoy,
-            fixed_C57=fixed_C57,
-            rt_unit=rt_unit,
-            **kwargs,
-        )
-
-        self.precursor_column = "EG.PrecursorId"  # TODO: move to yaml
-        self._min_max_rt_norm = False
+    _min_max_rt_norm = False
 
     def _pre_process(self, df: pd.DataFrame) -> pd.DataFrame:
         """Spectronaut report-specific preprocessing of output data."""
         df[[self.mod_seq_column, PsmDfCols.CHARGE]] = df[
-            self.precursor_column
+            "EG.PrecursorId"  # TODO: move to yaml
         ].str.split(".", expand=True, n=2)
         df[PsmDfCols.CHARGE] = df[PsmDfCols.CHARGE].astype(np.int8)
         return df

diff --git a/alphabase/psm_reader/maxquant_reader.py b/alphabase/psm_reader/maxquant_reader.py
@@ -127,67 +127,46 @@ class MaxQuantReader(PSMReaderBase):
     _reader_type = "maxquant"
     _add_unimod_to_mod_mapping = True
     _modification_type = "maxquant"
+    _fixed_c57_default = True
 
-    def __init__(  # noqa: PLR0913 many arguments in function definition
+    def __init__(  # noqa: PLR0913, D417 # too many arguments in function definition, missing argument descriptions
         self,
         *,
         column_mapping: Optional[dict] = None,
         modification_mapping: Optional[dict] = None,
+        mod_seq_columns: Optional[List[str]] = None,
         fdr: float = 0.01,
         keep_decoy: bool = False,
-        fixed_C57: bool = True,  # noqa: N803 TODO: make this  *,fixed_c57  (breaking)
-        mod_seq_columns: Optional[List[str]] = None,
         rt_unit: str = "minute",
+        # MaxQuant reader-specific
+        fixed_C57: Optional[bool] = None,  # noqa: N803 TODO: make this  *,fixed_c57  (breaking)
         **kwargs,
     ):
         """Reader for MaxQuant msms.txt and evidence.txt.
 
+        See documentation of `PSMReaderBase` for more information.
+
         Parameters
         ----------
-        column_mapping : dict, optional
-            By default None. If None, use
-            `psm_reader_yaml['maxquant']['column_mapping']`
-            (alphabase.psm_reader.psm_reader_yaml).
-
-        modification_mapping : dict, optional
-            By default None. If None, use
-            `psm_reader_yaml['maxquant']['modification_mapping']`
-            (alphabase.psm_reader.psm_reader_yaml).
-
-        fdr : float, optional
-            Load PSMs with FDR < this fdr, by default 0.01
-
-        keep_decoy : bool, optional
-            If keep decoy PSMs, by default False
-
         fixed_C57 : bool, optional
             If true, the search engine will not show `Carbamidomethyl`
             in the modified sequences.
             by default True
 
-        mod_seq_columns : list, optional
-            The columns to find modified sequences,
-            by default ['Modified sequence']
-
-        rt_unit : str, optional
-            The unit of RT in the search engine result.
-            Defaults to 'minute'.
-
-        **kwargs : dict
-            deprecated
+        See documentation of `PSMReaderBase` for the rest of parameters.
 
         """
         super().__init__(
             column_mapping=column_mapping,
             modification_mapping=modification_mapping,
+            mod_seq_columns=mod_seq_columns,
             fdr=fdr,
             keep_decoy=keep_decoy,
             rt_unit=rt_unit,
-            mod_seq_columns=mod_seq_columns,
             **kwargs,
         )
 
-        self.fixed_C57 = fixed_C57
+        self.fixed_C57 = fixed_C57 if fixed_C57 is not None else self._fixed_c57_default
 
     def _translate_decoy(self) -> None:
         if PsmDfCols.DECOY in self._psm_df.columns:

diff --git a/alphabase/psm_reader/msfragger_reader.py b/alphabase/psm_reader/msfragger_reader.py
@@ -98,18 +98,34 @@ class MSFraggerPepXML(PSMReaderBase):
 
     _reader_type = "msfragger_pepxml"
 
-    def __init__(  # noqa: PLR0913 many arguments in function definition
+    def __init__(  # noqa: PLR0913, D417 # too many arguments in function definition, missing argument descriptions
         self,
         *,
         column_mapping: Optional[dict] = None,
         modification_mapping: Optional[dict] = None,
+        # mod_seq_columns: Optional[List[str]] = None,# TODO: not needed here?
         fdr: float = 0.001,  # refers to E-value in the PepXML
         keep_decoy: bool = False,
         rt_unit: str = "second",
+        # MSFragger reader-specific:
         keep_unknown_aa_mass_diffs: bool = False,
         **kwargs,
     ):
-        """MSFragger is not fully supported as we can only access the pepxml file."""
+        """Initialize the MSFraggerreader.
+
+        See documentation of `PSMReaderBase` for more information.
+
+        MSFragger is not fully supported as we can only access the pepxml file.
+
+        Parameters
+        ----------
+            keep_unknown_aa_mass_diffs:
+                whether to keep PSMs with unknown amino acid mass differences, default: False
+
+
+        See documentation of `PSMReaderBase` for the rest of parameters.
+
+        """
         super().__init__(
             column_mapping=column_mapping,
             modification_mapping=modification_mapping,

diff --git a/alphabase/psm_reader/pfind_reader.py b/alphabase/psm_reader/pfind_reader.py
@@ -98,24 +98,6 @@ class pFindReader(PSMReaderBase):  # noqa: N801 name `pFindReader` should use Ca
 
     _reader_type = "pfind"
 
-    def __init__(
-        self,
-        *,
-        column_mapping: Optional[dict] = None,
-        modification_mapping: Optional[dict] = None,
-        fdr: float = 0.01,
-        keep_decoy: bool = False,
-        **kwargs,
-    ):
-        """Reading PSMs from pFind's *.txt."""
-        super().__init__(
-            column_mapping=column_mapping,
-            modification_mapping=modification_mapping,
-            fdr=fdr,
-            keep_decoy=keep_decoy,
-            **kwargs,
-        )
-
     def _translate_modifications(self) -> None:
         pass
 

diff --git a/alphabase/psm_reader/psm_reader.py b/alphabase/psm_reader/psm_reader.py
@@ -35,6 +35,8 @@ class PSMReaderBase(ABC):
     # the typ of modification mapping to be used
     _modification_type: Optional[str] = None
 
+    _min_max_rt_norm = False
+
     def __init__(  # noqa: PLR0913 # too many arguments
         self,
         *,
@@ -142,7 +144,6 @@ def __init__(  # noqa: PLR0913 # too many arguments
         self._psm_df = pd.DataFrame()
         self._keep_fdr = fdr
         self._keep_decoy = keep_decoy
-        self._min_max_rt_norm = False
         self._engine_rt_unit = rt_unit
         self._min_irt_value = -100
         self._max_irt_value = 200