Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add alphadia reader #254

Merged
merged 7 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions alphabase/constants/const_files/psm_reader.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,26 @@
alphadia:
reader_type: alphadia
rt_unit: minute
fixed_C57: False
column_mapping:
'raw_name': 'run'
'sequence': 'sequence'
'charge': 'charge'
'rt': 'rt_observed'
'rt_start': 'rt_start'
'rt_stop': 'rt_stop'
'ccs': 'ccs'
'mobility': 'mobility'
'proteins': 'proteins'
'uniprot_ids': 'uniprot_ids'
'genes': 'genes'
# 'scan_num': '' ?
'score': 'score'
'fdr': 'fdr'
'mods': 'mods'
'intensity': 'intensity'
modification_mapping_type: 'maxquant' # TODO: None?

alphapept:
reader_type: alphapept
rt_unit: minute
Expand Down
10 changes: 10 additions & 0 deletions alphabase/psm_reader/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,17 @@
"MSFraggerPepXMLReader",
"SageReaderTSV",
"SageReaderParquet",
"AlphaDiaReaderTsv",
"AlphaDiaReaderParquet",
]

from alphabase.psm_reader.alphadia_reader import (
AlphaDiaReaderParquet,
AlphaDiaReaderTsv,
)
from alphabase.psm_reader.alphadia_reader import (
register_readers as register_alphadia_readers,
)
from alphabase.psm_reader.alphapept_reader import (
AlphaPeptReader,
)
Expand Down Expand Up @@ -70,3 +79,4 @@
register_mq_readers()
register_pf_readers()
register_sage_readers()
register_alphadia_readers()
34 changes: 34 additions & 0 deletions alphabase/psm_reader/alphadia_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""Reader for AlphaDia data."""

from abc import ABC

import pandas as pd

from alphabase.psm_reader.psm_reader import PSMReaderBase, psm_reader_provider


class AlphaDiaReader(PSMReaderBase, ABC):
"""Reader for AlphaDia data."""

_reader_type = "alphadia"

def _translate_modifications(self) -> None:
"""Nothing to translate for AlphaDIA."""


class AlphaDiaReaderTsv(AlphaDiaReader):
"""Reader for AlphaDia TSV files."""


class AlphaDiaReaderParquet(AlphaDiaReader):
"""Reader for AlphaDia parquet files."""

def _load_file(self, filename: str) -> pd.DataFrame:
"""Read a parquet file."""
return pd.read_parquet(filename)


def register_readers() -> None:
"""Register AlphaDIA reader."""
psm_reader_provider.register_reader("alphadia", AlphaDiaReaderTsv)
psm_reader_provider.register_reader("alphadia_parquet", AlphaDiaReaderParquet)
2 changes: 1 addition & 1 deletion alphabase/psm_reader/msfragger_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def _pre_process(self, df: pd.DataFrame) -> pd.DataFrame:
"""MsFragger-specific preprocessing of output data."""
df.fillna("", inplace=True)
if "ion_mobility" in df.columns:
df["ion_mobility"] = df.ion_mobility.astype(float)
df["ion_mobility"] = df["ion_mobility"].astype(float)
df[PsmDfCols.RAW_NAME] = df["spectrum"].str.split(".").apply(lambda x: x[0])
df["to_remove"] = 0 # TODO: revisit
self.column_mapping[PsmDfCols.TO_REMOVE] = "to_remove"
Expand Down
3 changes: 2 additions & 1 deletion alphabase/psm_reader/psm_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,12 +300,13 @@ def _load_modifications(self, origin_df: pd.DataFrame) -> None: # noqa: B027 em

def _translate_modifications(self) -> None:
"""Translate modifications to AlphaBase format."""
self._psm_df[PsmDfCols.MODS], unknown_mods = zip(
mods, unknown_mods = zip(
*self._psm_df[PsmDfCols.MODS].apply(
translate_modifications,
mod_dict=self._modification_mapper.rev_mod_mapping,
)
)
self._psm_df[PsmDfCols.MODS] = mods

# accumulate unknown mods
unknown_mod_set = set()
Expand Down
Binary file not shown.
Binary file not shown.
48 changes: 47 additions & 1 deletion tests/integration/test_psm_readers.py

Large diffs are not rendered by default.

Loading