Skip to content

Commit

Permalink
Merge branch 'development' into windowsfix
Browse files Browse the repository at this point in the history
  • Loading branch information
jalew188 committed Jun 28, 2024
2 parents 9922319 + ca05c60 commit 3b55654
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 20 deletions.
4 changes: 2 additions & 2 deletions alphabase/psm_reader/sage_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def lookup_modification(

matched_mod = filtered_mod_df.sort_values(by="unimod_id").iloc[0]

return matched_mod["mod_name"]
return matched_mod["mod_name_stripped"]


def capture_modifications(
Expand Down Expand Up @@ -122,7 +122,7 @@ def capture_modifications(
mass_observed, previous_aa, mod_annotated_df, ppm_tolerance=ppm_tolerance
)
if mod is not None:
site_list.append(str(match_start - 1 - match_delta))
site_list.append(str(match_start - match_delta))
mod_list.append(mod)

else:
Expand Down
60 changes: 42 additions & 18 deletions nbs_tests/psm_reader/sage_reader.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -11,7 +11,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -22,7 +22,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -32,7 +32,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -42,7 +42,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -53,17 +53,20 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"#| hide\n",
"assert capture_modifications('Q[-17.026548]DQSANEKNK[+42.010567]LEM[+15.9949]NK[+42.010567]', annotated_mod_df) == ('0;9;12;14', 'Gln->pyro-Glu@Q^Any N-term;Acetyl@K;Oxidation@M;Acetyl@K')"
"modifications = capture_modifications('Q[-17.026548]DQSANEKNK[+42.010567]LEM[+15.9949]NK[+42.010567]', annotated_mod_df)\n",
"assert (\n",
" modifications == ('1;10;13;15', 'Gln->pyro-Glu@Q^Any_N-term;Acetyl@K;Oxidation@M;Acetyl@K')\n",
"), modifications"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -73,7 +76,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -193,7 +196,7 @@
" <td>0.000106</td>\n",
" <td>False</td>\n",
" <td>7932</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>Carbamidomethyl@C</td>\n",
" <td>15</td>\n",
" <td>0.770401</td>\n",
Expand Down Expand Up @@ -227,7 +230,7 @@
" <td>0.000106</td>\n",
" <td>False</td>\n",
" <td>8375</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>Oxidation@M</td>\n",
" <td>15</td>\n",
" <td>0.804350</td>\n",
Expand Down Expand Up @@ -280,9 +283,9 @@
"1 sp|P25815|S100P_HUMAN 0.000106 False 4250 \n",
"2 sp|P06748|NPM_HUMAN 0.000106 False 9584 \n",
"3 sp|O95602|RPA1_HUMAN 0.000106 False 10841 \n",
"4 sp|Q9HCK8|CHD8_HUMAN 0.000106 False 7932 1 \n",
"4 sp|Q9HCK8|CHD8_HUMAN 0.000106 False 7932 2 \n",
"5 sp|Q86U86|PB1_HUMAN 0.000106 False 14771 \n",
"6 sp|Q86TC9|MYPN_HUMAN 0.000106 False 8375 1 \n",
"6 sp|Q86TC9|MYPN_HUMAN 0.000106 False 8375 2 \n",
"7 sp|O14974|MYPT1_HUMAN 0.000106 False 1864 \n",
"\n",
" mods nAA rt_norm precursor_mz \n",
Expand All @@ -296,7 +299,7 @@
"7 17 1.000000 821.387362 "
]
},
"execution_count": null,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -321,15 +324,24 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"#| hide\n",
"assert np.all(psm_df['fdr'] <= 0.01)\n",
"assert psm_df['sequence'].iloc[4] == 'DCEDPEYKPLQGPPK'\n",
"assert psm_df['mod_sites'].iloc[4] == '1'\n",
"assert psm_df['mods'].iloc[4] == 'Carbamidomethyl@C'"
"assert (psm_df['mods'] != \"\").sum() == 2\n",
"assert (psm_df['mod_sites'] != \"\").sum() == 2\n",
"for seq, mods, mod_sites in psm_df[[\"sequence\",\"mods\",\"mod_sites\"]].values:\n",
" if mods == \"\":\n",
" assert mod_sites == \"\"\n",
" else:\n",
" mods = mods.split(\";\")\n",
" mod_sites = mod_sites.split(\";\")\n",
" for mod, site in zip(mods, mod_sites):\n",
" if site == \"0\":\n",
" continue\n",
" assert seq[int(site)-1] == mod[-1]"
]
}
],
Expand All @@ -338,6 +350,18 @@
"display_name": "python3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 3b55654

Please sign in to comment.