Skip to content

Commit

Permalink
Merge pull request #285 from MannLabs/infer-charged-frag-types
Browse files Browse the repository at this point in the history
fix #279
  • Loading branch information
GeorgWa authored Jan 21, 2025
2 parents 57e58ab + 1c7633d commit 8a1298c
Show file tree
Hide file tree
Showing 4 changed files with 220 additions and 94 deletions.
42 changes: 36 additions & 6 deletions alphabase/spectral_library/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,7 @@ def load_hdf(
hdf_file: str,
load_mod_seq: bool = True,
support_legacy_mods_format: bool = True,
infer_charged_frag_types: bool = True,
):
"""Load the hdf library from hdf_file
Expand All @@ -702,6 +703,10 @@ def load_hdf(
Defaults to True.
DeprecationWarning: future versions will have a different default and eventually this flag will be dropped.
infer_charged_frag_types : bool, optional
if True, infer the charged fragment types as defined in the hdf file, defaults to True.
This is the default as users most likely don't know the charged fragment types in the hdf file.
If set to False, only charged frag types defined in `charged_frag_types` will be loaded.
"""
_hdf = HDF_File(hdf_file)
self._precursor_df: pd.DataFrame = _hdf.library.precursor_df.values
Expand All @@ -719,17 +724,18 @@ def load_hdf(
self._precursor_df[cols] = mod_seq_df[cols]

_fragment_mz_df = _hdf.library.fragment_mz_df.values
self._fragment_mz_df = _fragment_mz_df[
sort_charged_frag_types(
filter_valid_charged_frag_types(_fragment_mz_df.columns.values)
if infer_charged_frag_types:
self.charged_frag_types = sort_charged_frag_types(
filter_valid_charged_frag_types(_fragment_mz_df.columns)
)

self._fragment_mz_df = _fragment_mz_df[
get_available_columns(_fragment_mz_df, self.charged_frag_types)
]

_fragment_intensity_df = _hdf.library.fragment_intensity_df.values
self._fragment_intensity_df = _fragment_intensity_df[
sort_charged_frag_types(
filter_valid_charged_frag_types(_fragment_intensity_df.columns.values)
)
get_available_columns(_fragment_intensity_df, self.charged_frag_types)
]

@staticmethod
Expand Down Expand Up @@ -822,3 +828,27 @@ def annotate_fragments_from_speclib(
speclib._fragment_intensity_df = fragment_speclib._fragment_intensity_df.copy()

return speclib


def get_available_columns(df, columns):
"""Get a list of column names that exist in the given dataframe.
Parameters
----------
df : pd.DataFrame
The dataframe to check columns against
columns : list
List of column names to check
Returns
-------
list
List of column names that exist in the dataframe
Examples
--------
>>> df = pd.DataFrame({'a': [1], 'b': [2]})
>>> get_available_columns(df, ['a', 'b', 'c'])
['a', 'b']
"""
return [col for col in columns if col in df.columns]
31 changes: 21 additions & 10 deletions alphabase/spectral_library/flat.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
remove_unused_fragments,
sort_charged_frag_types,
)
from alphabase.spectral_library.base import SpecLibBase
from alphabase.spectral_library.base import SpecLibBase, get_available_columns


class SpecLibFlat(SpecLibBase):
Expand Down Expand Up @@ -182,7 +182,12 @@ def save_hdf(self, hdf_file: str):
_hdf.library.fragment_mz_df = self.fragment_mz_df
_hdf.library.fragment_intensity_df = self.fragment_intensity_df

def load_hdf(self, hdf_file: str, load_mod_seq: bool = False):
def load_hdf(
self,
hdf_file: str,
load_mod_seq: bool = False,
infer_charged_frag_types: bool = True,
):
"""Load the hdf library from hdf_file
Parameters
Expand All @@ -194,6 +199,11 @@ def load_hdf(self, hdf_file: str, load_mod_seq: bool = False):
if also load mod_seq_df.
Defaults to False.
infer_charged_frag_types : bool, optional
if True, infer the charged fragment types as defined in the hdf file, defaults to True.
This is the default as users most likely don't know the charged fragment types in the hdf file.
If set to False, only charged frag types defined in `SpecLibBase.charged_frag_types` will be loaded.
"""
super().load_hdf(hdf_file, load_mod_seq=load_mod_seq)
_hdf = HDF_File(
Expand All @@ -202,18 +212,19 @@ def load_hdf(self, hdf_file: str, load_mod_seq: bool = False):
self._fragment_df = _hdf.library.fragment_df.values
self._protein_df = _hdf.library.protein_df.values

_fragment_mz_df = _hdf.library.fragment_mz_df.values
self._fragment_mz_df = _fragment_mz_df[
sort_charged_frag_types(
filter_valid_charged_frag_types(_fragment_mz_df.columns.values)
if infer_charged_frag_types:
self.charged_frag_types = sort_charged_frag_types(
filter_valid_charged_frag_types(_hdf.library.fragment_mz_df.columns)
)
]

_fragment_intensity_df = _hdf.library.fragment_intensity_df.values
self._fragment_intensity_df = _fragment_intensity_df[
sort_charged_frag_types(
filter_valid_charged_frag_types(_fragment_intensity_df.columns.values)
)
get_available_columns(_fragment_intensity_df, self.charged_frag_types)
]

_fragment_mz_df = _hdf.library.fragment_mz_df.values
self._fragment_mz_df = _fragment_mz_df[
get_available_columns(_fragment_mz_df, self.charged_frag_types)
]

def get_full_charged_types(self, frag_df: pd.DataFrame) -> list:
Expand Down
15 changes: 5 additions & 10 deletions nbs_tests/spectral_library/flat_library.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -196,13 +196,8 @@
"metadata": {},
"outputs": [],
"source": [
"# Check if the fragment intensity dataframes are equal\n",
"# since for now the reader uses a predefined charge_types and the \"to_specLibBase\" method infers the charged_types \n",
"# We will ignore columns that are full zeros in the target object\n",
"\n",
"non_zero_target_columns = target.fragment_intensity_df.columns[target.fragment_intensity_df.sum() != 0]\n",
"\n",
"pd.testing.assert_frame_equal(target.fragment_intensity_df[non_zero_target_columns].sort_index(axis=1), back_to_base.fragment_intensity_df.sort_index(axis=1), check_dtype=False)"
"pd.testing.assert_frame_equal(target.fragment_intensity_df.sort_index(axis=1), back_to_base.fragment_intensity_df.sort_index(axis=1), check_dtype=False)"
]
},
{
Expand All @@ -213,7 +208,7 @@
"source": [
"# Check the non zero values in back_to_base.fragment_mz_df are the same as in target.fragment_mz_df\n",
"non_zero_indices = back_to_base.fragment_mz_df > 0\n",
"pd.testing.assert_frame_equal(target.fragment_mz_df[non_zero_target_columns][non_zero_indices].sort_index(axis=1), back_to_base.fragment_mz_df[non_zero_indices].sort_index(axis=1), check_dtype=False)"
"pd.testing.assert_frame_equal(target.fragment_mz_df[non_zero_indices].sort_index(axis=1), back_to_base.fragment_mz_df[non_zero_indices].sort_index(axis=1), check_dtype=False)"
]
},
{
Expand All @@ -224,12 +219,12 @@
"source": [
"#Calculate the full fragment_mz for the back_to_base and compare to the original\n",
"back_to_base.calc_fragment_mz_df()\n",
"pd.testing.assert_frame_equal(target.fragment_mz_df[non_zero_target_columns].sort_index(axis=1), back_to_base.fragment_mz_df.sort_index(axis=1), check_dtype=False)"
"pd.testing.assert_frame_equal(target.fragment_mz_df.sort_index(axis=1), back_to_base.fragment_mz_df.sort_index(axis=1), check_dtype=False)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -284,7 +279,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
"version": "3.11.7"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit 8a1298c

Please sign in to comment.