Skip to content

Update SpectrumDocument.py #93

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
12 changes: 6 additions & 6 deletions .github/workflows/CI_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ on:
jobs:

first_check:
name: first code check / python-3.8 / ubuntu-latest
name: first code check / python-3.9 / ubuntu-latest
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v1
with:
python-version: 3.8
python-version: 3.9
- name: Python info
run: |
which python
Expand Down Expand Up @@ -53,10 +53,10 @@ jobs:
fail-fast: false
matrix:
os: ['ubuntu-latest', 'macos-latest', 'windows-latest']
python-version: ['3.7', '3.8', '3.9']
python-version: ['3.9']
exclude:
# already tested in first_check job
- python-version: 3.8
- python-version: 3.9
os: ubuntu-latest
steps:
- uses: actions/checkout@v2
Expand Down Expand Up @@ -108,7 +108,7 @@ jobs:
echo "The code is sufficiently documented with ${UNCOVERED_MEMBERS} uncovered members out of ${UNCOVERED_MEMBERS_ALLOWED} allowed.";

anaconda_build:
name: Anaconda build / python-3.7 / ubuntu-latest
name: Anaconda build / python-3.9 / ubuntu-latest
runs-on: ubuntu-latest
strategy:
fail-fast: false
Expand All @@ -123,7 +123,7 @@ jobs:
activate-environment: spec2vec-build
auto-update-conda: true
environment-file: conda/environment-build.yml
python-version: 3.8
python-version: 3.9
- name: Show conda config
shell: bash -l {0}
run: |
Expand Down
3 changes: 1 addition & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,6 @@ dataset.
s = msfilters.normalize_intensities(s)
s = msfilters.reduce_to_number_of_peaks(s, n_required=10, ratio_desired=0.5, n_max=500)
s = msfilters.select_by_mz(s, mz_from=0, mz_to=1000)
s = msfilters.add_losses(s, loss_mz_from=10.0, loss_mz_to=200.0)
s = msfilters.require_minimum_number_of_peaks(s, n_required=10)
return s

Expand All @@ -150,7 +149,7 @@ dataset.
spectrums = [s for s in spectrums if s is not None]

# Create spectrum documents
reference_documents = [SpectrumDocument(s, n_decimals=2) for s in spectrums]
reference_documents = [SpectrumDocument(s, n_decimals=2, loss_mz_from=10.0, loss_mz_to=200.0) for s in spectrums]

model_file = "references.model"
model = train_new_word2vec_model(reference_documents, iterations=[10, 20, 30], filename=model_file,
Expand Down
5 changes: 2 additions & 3 deletions integration-tests/test_user_workflow_spec2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import gensim
import numpy as np
from matchms import calculate_scores
from matchms.filtering import (add_losses, add_parent_mass, default_filters,
from matchms.filtering import (add_parent_mass, default_filters,
normalize_intensities,
reduce_to_number_of_peaks,
require_minimum_number_of_peaks, select_by_mz)
Expand All @@ -26,7 +26,6 @@ def apply_my_filters(s):
s = normalize_intensities(s)
s = reduce_to_number_of_peaks(s, n_required=10, ratio_desired=0.5)
s = select_by_mz(s, mz_from=0, mz_to=1000)
s = add_losses(s, loss_mz_from=10.0, loss_mz_to=200.0)
s = require_minimum_number_of_peaks(s, n_required=5)
return s

Expand All @@ -40,7 +39,7 @@ def apply_my_filters(s):
spectrums = [s for s in spectrums if s is not None]

# convert spectrums to spectrum 'documents'
documents = [SpectrumDocument(s, n_decimals=1) for s in spectrums]
documents = [SpectrumDocument(s, n_decimals=1, loss_mz_from=10.0, loss_mz_to=200.0) for s in spectrums]

model_file = os.path.join(repository_root, "integration-tests", "test_user_workflow_spec2vec.model")
if os.path.isfile(model_file):
Expand Down
6 changes: 2 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,13 @@
"License :: OSI Approved :: Apache Software License",
"Natural Language :: English",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
],
test_suite="tests",
python_requires='>=3.7',
python_requires='>=3.9',
install_requires=[
"gensim >=4.2.0",
"matchms >=0.14.0",
"matchms >=0.27.0",
"numba >=0.51",
"numpy",
"scipy",
Expand Down
14 changes: 8 additions & 6 deletions spec2vec/SpectrumDocument.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class SpectrumDocument(Document):
[100. 150. 200.51]
substance1
"""
def __init__(self, spectrum, n_decimals: int = 2):
def __init__(self, spectrum, n_decimals: int = 2, loss_mz_from=10, loss_mz_to=200):
"""

Parameters
Expand All @@ -51,15 +51,17 @@ def __init__(self, spectrum, n_decimals: int = 2):
word "[email protected]".
"""
self.n_decimals = n_decimals
self.loss_mz_from = loss_mz_from
self.loss_mz_to = loss_mz_to
self.weights = None
super().__init__(obj=spectrum)
self._add_weights()

def _make_words(self):
"""Create word from peaks (and losses)."""
peak_words = [f"peak@{mz:.{self.n_decimals}f}" for mz in self._obj.peaks.mz]
if self._obj.losses is not None:
loss_words = [f"loss@{mz:.{self.n_decimals}f}" for mz in self._obj.losses.mz]
if self.losses is not None:
loss_words = [f"loss@{mz:.{self.n_decimals}f}" for mz in self.losses.mz]
else:
loss_words = []
self.words = peak_words + loss_words
Expand All @@ -70,8 +72,8 @@ def _add_weights(self):
assert self._obj.peaks.intensities.max() <= 1, "peak intensities not normalized"

peak_intensities = self._obj.peaks.intensities.tolist()
if self._obj.losses is not None:
loss_intensities = self._obj.losses.intensities.tolist()
if self.losses is not None:
loss_intensities = self.losses.intensities.tolist()
else:
loss_intensities = []
self.weights = peak_intensities + loss_intensities
Expand All @@ -96,7 +98,7 @@ def metadata(self):
@property
def losses(self) -> Optional[Spikes]:
"""Return losses of original spectrum."""
return self._obj.losses
return self._obj.compute_losses(self.loss_mz_from, self.loss_mz_to)

@property
def peaks(self) -> Spikes:
Expand Down
18 changes: 7 additions & 11 deletions tests/test_spectrum_document.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import numpy as np
import pytest
from matchms import Spectrum
from matchms.filtering import add_losses
from spec2vec import SpectrumDocument


Expand All @@ -11,7 +10,7 @@ def test_spectrum_document_init_n_decimals_default_value_no_losses():
intensities = np.array([0, 0.01, 0.1, 1], dtype="float")
metadata = dict(precursor_mz=100.0)
spectrum = Spectrum(mz=mz, intensities=intensities, metadata=metadata)
spectrum_document = SpectrumDocument(spectrum)
spectrum_document = SpectrumDocument(spectrum, loss_mz_from = 0.0, loss_mz_to = -1.0)

assert spectrum_document.n_decimals == 2, "Expected different default for n_decimals"
assert len(spectrum_document) == 4
Expand All @@ -26,7 +25,7 @@ def test_spectrum_document_init_n_decimals_1_no_losses():
intensities = np.array([0, 0.01, 0.1, 1], dtype="float")
metadata = dict(precursor_mz=100.0)
spectrum = Spectrum(mz=mz, intensities=intensities, metadata=metadata)
spectrum_document = SpectrumDocument(spectrum, n_decimals=1)
spectrum_document = SpectrumDocument(spectrum, n_decimals=1, loss_mz_from = 0.0, loss_mz_to = -1.0)

assert spectrum_document.n_decimals == 1
assert len(spectrum_document) == 4
Expand All @@ -42,8 +41,7 @@ def test_spectrum_document_init_default_with_losses():
intensities = np.array([0, 0.01, 0.1, 1], dtype="float")
metadata = dict(precursor_mz=100.0)
spectrum_in = Spectrum(mz=mz, intensities=intensities, metadata=metadata)
spectrum = add_losses(spectrum_in)
spectrum_document = SpectrumDocument(spectrum)
spectrum_document = SpectrumDocument(spectrum_in)

assert spectrum_document.n_decimals == 2, "Expected different default for n_decimals"
assert len(spectrum_document) == 8
Expand All @@ -60,8 +58,7 @@ def test_spectrum_document_init_n_decimals_1():
intensities = np.array([0, 0.01, 0.1, 1], dtype="float")
metadata = dict(precursor_mz=100.0)
spectrum_in = Spectrum(mz=mz, intensities=intensities, metadata=metadata)
spectrum = add_losses(spectrum_in)
spectrum_document = SpectrumDocument(spectrum, n_decimals=1)
spectrum_document = SpectrumDocument(spectrum_in, n_decimals=1)

assert spectrum_document.n_decimals == 1
assert len(spectrum_document) == 8
Expand All @@ -79,7 +76,7 @@ def test_spectrum_document_metadata_getter():
metadata = {"precursor_mz": 100.0,
"smiles": "testsmiles"}
spectrum_in = Spectrum(mz=mz, intensities=intensities, metadata=metadata)
spectrum_document = SpectrumDocument(spectrum_in, n_decimals=2)
spectrum_document = SpectrumDocument(spectrum_in, n_decimals=2, loss_mz_from=0.0, loss_mz_to=-1.0)

assert spectrum_document.n_decimals == 2
assert len(spectrum_document) == 4
Expand Down Expand Up @@ -112,7 +109,7 @@ def test_spectrum_document_peak_getter():
intensities = np.array([0, 0.01, 0.1, 1], dtype="float")
metadata = {"precursor_mz": 100.0}
spectrum_in = Spectrum(mz=mz, intensities=intensities, metadata=metadata)
spectrum_document = SpectrumDocument(spectrum_in, n_decimals=2)
spectrum_document = SpectrumDocument(spectrum_in, n_decimals=2, loss_mz_from=0.0, loss_mz_to=-1.0)

assert spectrum_document.words == [
"[email protected]", "[email protected]", "[email protected]", "[email protected]"
Expand All @@ -127,8 +124,7 @@ def test_spectrum_document_losses_getter():
intensities = np.array([0, 0.01, 0.1, 1], dtype="float")
metadata = {"precursor_mz": 100.0}
spectrum_in = Spectrum(mz=mz, intensities=intensities, metadata=metadata)
spectrum = add_losses(spectrum_in)
spectrum_document = SpectrumDocument(spectrum, n_decimals=2)
spectrum_document = SpectrumDocument(spectrum_in, n_decimals=2)
assert np.all(spectrum_document.losses.mz == np.array([60., 70., 80., 90.])), \
"Expected different losses"
assert np.all(spectrum_document.losses.intensities == intensities[::-1]), \
Expand Down
Loading