Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor readers v #247

Merged
merged 22 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion alphabase/constants/const_files/psm_reader.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,6 @@ library_reader_base:
reader_type: library_reader_base
rt_unit: irt
fixed_C57: False
csv_sep: "\t"
jalew188 marked this conversation as resolved.
Show resolved Hide resolved
mod_seq_columns:
- 'ModifiedPeptideSequence'
- 'ModifiedPeptide'
Expand Down
5 changes: 1 addition & 4 deletions alphabase/psm_reader/alphapept_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from alphabase.psm_reader.psm_reader import (
PSMReaderBase,
psm_reader_provider,
psm_reader_yaml,
)


Expand Down Expand Up @@ -53,6 +52,7 @@ class AlphaPeptReader(PSMReaderBase):
"""Reader for AlphaPept's *.ms_data.hdf files."""

_reader_type = "alphapept"
_modification_type = "alphapept"

def __init__(
self,
Expand All @@ -73,9 +73,6 @@ def __init__(
)
self.hdf_dataset = "identifications"

def _init_modification_mapping(self) -> None:
self.modification_mapping = psm_reader_yaml["alphapept"]["modification_mapping"]

def _load_file(self, filename: str) -> pd.DataFrame:
with h5py.File(filename, "r") as _hdf:
dataset = _hdf[self.hdf_dataset]
Expand Down
9 changes: 1 addition & 8 deletions alphabase/psm_reader/maxquant_reader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Reader for MaxQuant data."""

import copy
import warnings
from typing import List, Optional

Expand All @@ -12,7 +11,6 @@
from alphabase.psm_reader.psm_reader import (
PSMReaderBase,
psm_reader_provider,
psm_reader_yaml,
)

# make sure all warnings are shown
Expand Down Expand Up @@ -128,6 +126,7 @@ class MaxQuantReader(PSMReaderBase):

_reader_type = "maxquant"
_add_unimod_to_mod_mapping = True
_modification_type = "maxquant"

def __init__( # noqa: PLR0913 many arguments in function definition
self,
Expand Down Expand Up @@ -196,12 +195,6 @@ def __init__( # noqa: PLR0913 many arguments in function definition
self._mod_seq_columns = mod_seq_columns
self.mod_seq_column = "Modified sequence"

def _init_modification_mapping(self) -> None:
self.modification_mapping = copy.deepcopy(
# otherwise maxquant reader will modify the dict inplace
psm_reader_yaml["maxquant"]["modification_mapping"]
)

def _translate_decoy(self) -> None:
if PsmDfCols.DECOY in self._psm_df.columns:
self._psm_df[PsmDfCols.DECOY] = (
Expand Down
170 changes: 170 additions & 0 deletions alphabase/psm_reader/modification_mapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
"""Module to handle modification mappings for different search engines."""
jalew188 marked this conversation as resolved.
Show resolved Hide resolved

import copy
from collections import defaultdict
from typing import Dict, Optional

from alphabase.psm_reader.utils import MOD_TO_UNIMOD_DICT, get_extended_modifications


class ModificationMapper:
"""Class to handle modification mappings for different search engines."""

def __init__(
self,
custom_modification_mapping: Optional[Dict[str, str]],
reader_yaml: Dict,
mschwoer marked this conversation as resolved.
Show resolved Hide resolved
modification_type: Optional[str],
*,
add_unimod_to_mod_mapping: bool,
):
"""Initialize the ModificationMapper.

Parameters
----------
custom_modification_mapping:
A custom mapping or a string referencing one of the mappings in the reader_yaml
The key of dict is a modification name in AlphaBase format;
the value could be a str or a list, see below
```
add_modification_mapping({
'Dimethyl@K': ['K(Dimethyl)'], # list
'Dimethyl@Any_N-term': '_(Dimethyl)', # str
})

reader_yaml:
the yaml (read from file) containing the modification mappings

modification_type:
the type of modification mapping ("maxquant" or "alphapept")

add_unimod_to_mod_mapping:
whether unimod modifications should be added to the mapping

"""
self._psm_reader_yaml = reader_yaml
self._add_unimod_to_mod_mapping = add_unimod_to_mod_mapping
self._modification_type = modification_type

self.modification_mapping = None
self.rev_mod_mapping = None
self.set_modification_mapping()
self.add_modification_mapping(custom_modification_mapping)

def add_modification_mapping(self, custom_modification_mapping: dict) -> None:
"""Append additional modification mappings for the search engine.

Also creates a reverse mapping from the modification format used by the search engine to the AlphaBase format.

Parameters
----------
custom_modification_mapping : dict
The key of dict is a modification name in AlphaBase format;
the value could be a str or a list, see below
```
add_modification_mapping({
'Dimethyl@K': ['K(Dimethyl)'], # list
'Dimethyl@Any_N-term': '_(Dimethyl)', # str
})
```

"""
if not isinstance(custom_modification_mapping, dict):
return

new_modification_mapping = defaultdict(list)
for key, val in list(custom_modification_mapping.items()):
if isinstance(val, str):
new_modification_mapping[key].append(val)
else:
new_modification_mapping[key].extend(val)

if new_modification_mapping:
self.set_modification_mapping(
self.modification_mapping | new_modification_mapping
)

def set_modification_mapping(
self, modification_mapping: Optional[Dict] = None
) -> None:
"""Set the modification mapping for the search engine.

Also creates a reverse mapping from the modification format used by the search engine to the AlphaBase format.

Parameters
----------
modification_mapping:
If dictionary: the current modification_mapping will be overwritten by this.
If str: the parameter will be interpreted as a reader type, and the modification_mapping is read from the
"modification_mapping" section of the psm_reader_yaml

"""
if modification_mapping is None:
self._init_modification_mapping()
elif isinstance(
modification_mapping, str
): # TODO: remove this overloading of the parameter by introducing yaml key "modification_mapping_type"
if modification_mapping in self._psm_reader_yaml:
self.modification_mapping = self._psm_reader_yaml[modification_mapping][
"modification_mapping"
]
else:
raise ValueError(
f"Unknown modification mapping: {modification_mapping}"
)
else:
self.modification_mapping = copy.deepcopy(modification_mapping)

self._str_mods_to_lists()

if self._add_unimod_to_mod_mapping:
self._add_all_unimod()
self._extend_mod_brackets()

self.rev_mod_mapping = self._get_reversed_mod_mapping()

def _init_modification_mapping(self) -> None:
"""Initialize the modification mapping from the psm_reader_yaml or as an empty dictionary."""
if self._modification_type is not None:
self.modification_mapping = self._psm_reader_yaml[self._modification_type][
"modification_mapping"
]
else:
self.modification_mapping = {}

def _add_all_unimod(self) -> None:
"""Add all unimod modifications to the modification mapping."""
for mod_name, unimod in MOD_TO_UNIMOD_DICT.items():
if mod_name in self.modification_mapping:
self.modification_mapping[mod_name].append(unimod)
else:
self.modification_mapping[mod_name] = [unimod]

def _extend_mod_brackets(self) -> None:
"""Update modification_mapping to include different bracket types."""
for key, mod_list in list(self.modification_mapping.items()):
self.modification_mapping[key] = get_extended_modifications(mod_list)

def _str_mods_to_lists(self) -> None:
"""Convert all single strings to lists containing one item in self.modification_mapping."""
for mod, val in list(self.modification_mapping.items()):
if isinstance(val, str):
self.modification_mapping[mod] = [val]

def _get_reversed_mod_mapping(self) -> Dict[str, str]:
"""Create a reverse mapping from the modification format used by the search engine to the AlphaBase format."""
rev_mod_mapping = {}
for mod_alphabase_format, mod_other_format in self.modification_mapping.items():
if isinstance(mod_other_format, (list, tuple)):
for mod_other_format_ in mod_other_format:
if (
mod_other_format_ in rev_mod_mapping
and mod_alphabase_format.endswith("Protein_N-term")
):
continue

rev_mod_mapping[mod_other_format_] = mod_alphabase_format
else:
rev_mod_mapping[mod_other_format] = mod_alphabase_format

return rev_mod_mapping
Loading
Loading