Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor multistep search #402

Open
wants to merge 9 commits into
base: outline_for_multistep_search
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 1 addition & 16 deletions alphadia/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,29 +346,14 @@ def run(*args, **kwargs):

print("No output directory specified.")
return
reporting.init_logging(output_directory)

quant_dir = parse_quant_dir(args, config)

reporting.init_logging(output_directory)
raw_path_list = parse_raw_path_list(args, config)

library_path = parse_library(args, config)
fasta_path_list = parse_fasta(args, config)

logger.progress(f"Searching {len(raw_path_list)} files:") # TODO move
for f in raw_path_list:
logger.progress(f" {os.path.basename(f)}")

logger.progress(f"Using library: {library_path}")

logger.progress(f"Using {len(fasta_path_list)} fasta files:")
for f in fasta_path_list:
logger.progress(f" {f}")

# TODO rename all output_directory, output_folder => output_path, quant_dir->quant_path (except cli parameter)
logger.progress(f"Saving output to: {output_directory}")
if quant_dir is not None:
logger.progress(f"Saving quantification output to {quant_dir=}")

# important to suppress matplotlib output
matplotlib.use("Agg")
Expand Down
24 changes: 24 additions & 0 deletions alphadia/constants/keys.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
class ConstantsClass(type):
"""A metaclass for classes that should only contain string constants."""

def __setattr__(self, name, value):
raise TypeError("Constants class cannot be modified")

def get_values(cls):
"""Get all user-defined string values of the class."""
return [
value
for key, value in cls.__dict__.items()
if not key.startswith("__") and isinstance(value, str)
]


class OutputKeys(metaclass=ConstantsClass):
mschwoer marked this conversation as resolved.
Show resolved Hide resolved
"""String constants for reading and writing output columns."""

# optimization
OPTIMIZATION_PREFIX = "optimization."
MS1_ERROR = "ms1_error"
MS2_ERROR = "ms2_error"
RT_ERROR = "rt_error"
MOBILITY_ERROR = "mobility_error"
33 changes: 18 additions & 15 deletions alphadia/outputtransform.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from alphadia import fdr, grouping, libtransform, utils
from alphadia.consensus.utils import read_df, write_df
from alphadia.constants.keys import OutputKeys
from alphadia.exceptions import NoPsmFoundError
from alphadia.outputaccumulator import (
AccumulationBroadcaster,
Expand All @@ -29,12 +30,6 @@
from alphadia.workflow.config import Config
from alphadia.workflow.managers.raw_file_manager import RawFileManager

# TODO move to a class with the rest of the constants
MS1_ERROR = "ms1_error"
MS2_ERROR = "ms2_error"

OPTIMIZATION_PREFIX = "optimization."

logger = logging.getLogger()


Expand Down Expand Up @@ -403,9 +398,10 @@ def build_transfer_model(self, save=True):
transfer_lib_path = os.path.join(
self.output_folder, f"{self.TRANSFER_OUTPUT}.hdf"
)
assert os.path.exists(
transfer_lib_path
), f"Transfer library not found at {transfer_lib_path}, did you enable library generation?"
if not os.path.exists(transfer_lib_path):
raise ValueError(
f"Transfer library not found at {transfer_lib_path}, did you enable library generation?"
)

transfer_lib = SpecLibBase()
transfer_lib.load_hdf(
Expand Down Expand Up @@ -981,15 +977,22 @@ def _build_run_stat_df(
optimization_manager = manager.OptimizationManager(
path=optimization_manager_path
)
optimization_stats[MS2_ERROR] = optimization_manager.ms2_error
optimization_stats[MS1_ERROR] = optimization_manager.ms1_error
optimization_stats["rt_error"] = optimization_manager.rt_error
optimization_stats["mobility_error"] = optimization_manager.mobility_error
optimization_stats[OutputKeys.MS2_ERROR] = optimization_manager.ms2_error
mschwoer marked this conversation as resolved.
Show resolved Hide resolved
optimization_stats[OutputKeys.MS1_ERROR] = optimization_manager.ms1_error
optimization_stats[OutputKeys.RT_ERROR] = optimization_manager.rt_error
optimization_stats[OutputKeys.MOBILITY_ERROR] = (
optimization_manager.mobility_error
)
else:
logger.warning(f"Error reading optimization manager for {raw_name}")

for key in [MS2_ERROR, MS1_ERROR, "rt_error", "mobility_error"]:
stats[f"{OPTIMIZATION_PREFIX}{key}"] = optimization_stats[key]
for key in [
OutputKeys.MS2_ERROR,
OutputKeys.MS1_ERROR,
OutputKeys.RT_ERROR,
OutputKeys.MOBILITY_ERROR,
]:
stats[f"{OutputKeys.OPTIMIZATION_PREFIX}{key}"] = optimization_stats[key]

# collect calibration stats
calibration_stats = defaultdict(lambda: np.nan)
Expand Down
103 changes: 34 additions & 69 deletions alphadia/planning.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,13 @@
# native imports
import logging
import os
import socket
from collections.abc import Generator
from datetime import datetime
from importlib import metadata
from pathlib import Path

import alphabase
import alpharaw
import alphatims
import directlfq
import peptdeep

# third party imports
import torch
from alphabase.constants import modification
from alphabase.spectral_library.base import SpecLibBase

# alpha family imports
from alphabase.spectral_library.flat import SpecLibFlat

import alphadia

# alphadia imports
from alphadia import libtransform, outputtransform
from alphadia.exceptions import CustomError
from alphadia.workflow import peptidecentric, reporting
Expand Down Expand Up @@ -109,16 +93,7 @@ def __init__(

torch.set_num_threads(self._config["general"]["thread_count"])

@staticmethod
def print_logo() -> None: # TODO move elsewhere
"""Print the alphadia logo and version."""
logger.progress(" _ _ ___ ___ _ ")
logger.progress(r" __ _| |_ __| |_ __ _| \_ _| /_\ ")
logger.progress(" / _` | | '_ \\ ' \\/ _` | |) | | / _ \\ ")
logger.progress(" \\__,_|_| .__/_||_\\__,_|___/___/_/ \\_\\")
logger.progress(" |_| ")
logger.progress("")
logger.progress(f"version: {alphadia.__version__}")
self._log_inputs()

def _init_config(
self,
Expand Down Expand Up @@ -183,30 +158,6 @@ def spectral_library(self) -> SpecLibFlat:
def spectral_library(self, spectral_library: SpecLibFlat) -> None:
self._spectral_library = spectral_library

@staticmethod
def print_environment() -> None: # TODO move elsewhere
"""Log information about the python environment."""

logger.progress(f"hostname: {socket.gethostname()}")
now = datetime.today().strftime("%Y-%m-%d %H:%M:%S")
logger.progress(f"date: {now}")

logger.progress("================ AlphaX Environment ===============")
logger.progress(f"{'alphatims':<15} : {alphatims.__version__:}")
logger.progress(f"{'alpharaw':<15} : {alpharaw.__version__}")
logger.progress(f"{'alphabase':<15} : {alphabase.__version__}")
logger.progress(f"{'alphapeptdeep':<15} : {peptdeep.__version__}")
logger.progress(f"{'directlfq':<15} : {directlfq.__version__}")
logger.progress("===================================================")

logger.progress("================= Pip Environment =================")
pip_env = [
f"{dist.metadata['Name']}=={dist.version}"
for dist in metadata.distributions()
]
logger.progress(" ".join(pip_env))
logger.progress("===================================================")

def init_alphabase(self):
"""Init alphabase by registering custom modifications."""

Expand Down Expand Up @@ -234,27 +185,27 @@ def _parse_modifications(mod_str: str) -> list[str]:

prediction_config = self.config["library_prediction"]

fasta_digest = libtransform.FastaDigest(
enzyme=prediction_config["enzyme"],
fixed_modifications=_parse_modifications(
prediction_config["fixed_modifications"]
),
variable_modifications=_parse_modifications(
prediction_config["variable_modifications"]
),
max_var_mod_num=prediction_config["max_var_mod_num"],
missed_cleavages=prediction_config["missed_cleavages"],
precursor_len=prediction_config["precursor_len"],
precursor_charge=prediction_config["precursor_charge"],
precursor_mz=prediction_config["precursor_mz"],
)

if self.library_path is None and prediction_config["predict"]:
logger.progress("No library provided. Building library from fasta files.")
spectral_library = fasta_digest(self.fasta_path_list)
elif self.library_path is None and not prediction_config["predict"]:
if self.library_path is None and not prediction_config["predict"]:
logger.error("No library provided and prediction disabled.")
return
elif self.library_path is None and prediction_config["predict"]:
logger.progress("No library provided. Building library from fasta files.")

fasta_digest = libtransform.FastaDigest(
enzyme=prediction_config["enzyme"],
fixed_modifications=_parse_modifications(
prediction_config["fixed_modifications"]
),
variable_modifications=_parse_modifications(
prediction_config["variable_modifications"]
),
max_var_mod_num=prediction_config["max_var_mod_num"],
missed_cleavages=prediction_config["missed_cleavages"],
precursor_len=prediction_config["precursor_len"],
precursor_charge=prediction_config["precursor_charge"],
precursor_mz=prediction_config["precursor_mz"],
)
spectral_library = fasta_digest(self.fasta_path_list)
else:
spectral_library = dynamic_loader(self.library_path)

Expand Down Expand Up @@ -434,6 +385,20 @@ def _clean(self):
except Exception as e:
logger.exception(f"Error deleting library: {e}")

def _log_inputs(self):
"""Log all relevant inputs."""

logger.info(f"Searching {len(self.raw_path_list)} files:")
for f in self.raw_path_list:
logger.info(f" {os.path.basename(f)}")

logger.info(f"Using {len(self.fasta_path_list)} fasta files:")
for f in self.fasta_path_list:
logger.info(f" {f}")

logger.info(f"Using library: {self.library_path}")
logger.info(f"Saving output to: {self.output_folder}")


def _log_exception_event(
e: Exception, raw_name: str | None = None, workflow: WorkflowBase | None = None
Expand Down
17 changes: 10 additions & 7 deletions alphadia/search_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,15 @@
import pandas as pd
import yaml

from alphadia.constants.keys import OutputKeys
from alphadia.outputtransform import (
MS1_ERROR,
MS2_ERROR,
OPTIMIZATION_PREFIX,
SearchPlanOutput,
)
from alphadia.planning import (
Plan,
logger,
)
from alphadia.utilities.logging import print_environment, print_logo
from alphadia.workflow import reporting

# TODO the names of the steps need to be adjusted
Expand Down Expand Up @@ -124,8 +123,8 @@ def run_plan(self):
Depending on what steps are to be run, the relevant information (e.g. file paths or thresholds) is passed
from one to the next step via 'extra config'.
"""
Plan.print_logo()
Plan.print_environment()
print_logo()
print_environment()

# TODO add some logging here on the directories (if they are not logged elsewhere)

Expand Down Expand Up @@ -218,8 +217,12 @@ def _get_optimized_values_config(output_folder: Path) -> dict:
df = pd.read_csv(
output_folder / f"{SearchPlanOutput.STAT_OUTPUT}.tsv", sep="\t"
)
target_ms1_tolerance = np.nanmedian(df[f"{OPTIMIZATION_PREFIX}{MS1_ERROR}"])
target_ms2_tolerance = np.nanmedian(df[f"{OPTIMIZATION_PREFIX}{MS2_ERROR}"])
target_ms1_tolerance = np.nanmedian(
df[f"{OutputKeys.OPTIMIZATION_PREFIX}{OutputKeys.MS1_ERROR}"]
)
target_ms2_tolerance = np.nanmedian(
df[f"{OutputKeys.OPTIMIZATION_PREFIX}{OutputKeys.MS2_ERROR}"]
)

if np.isnan(target_ms1_tolerance) and np.isnan(target_ms2_tolerance):
logger.warning(
Expand Down
Empty file added alphadia/utilities/__init__.py
Empty file.
48 changes: 48 additions & 0 deletions alphadia/utilities/logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import logging
import socket
from datetime import datetime
from importlib import metadata

import alphabase
import alpharaw
import alphatims
import directlfq
import peptdeep

import alphadia

logger = logging.getLogger()


def print_logo() -> None:
"""Print the alphadia logo and version."""
logger.progress(" _ _ ___ ___ _ ")
logger.progress(r" __ _| |_ __| |_ __ _| \_ _| /_\ ")
logger.progress(" / _` | | '_ \\ ' \\/ _` | |) | | / _ \\ ")
logger.progress(" \\__,_|_| .__/_||_\\__,_|___/___/_/ \\_\\")
logger.progress(" |_| ")
logger.progress("")
logger.progress(f"version: {alphadia.__version__}")


def print_environment() -> None:
"""Log information about the python environment."""

logger.info(f"hostname: {socket.gethostname()}")
now = datetime.today().strftime("%Y-%m-%d %H:%M:%S")
logger.info(f"date: {now}")

logger.info("================ AlphaX Environment ===============")
logger.info(f"{'alphatims':<15} : {alphatims.__version__:}")
logger.info(f"{'alpharaw':<15} : {alpharaw.__version__}")
logger.info(f"{'alphabase':<15} : {alphabase.__version__}")
logger.info(f"{'alphapeptdeep':<15} : {peptdeep.__version__}")
logger.info(f"{'directlfq':<15} : {directlfq.__version__}")
logger.info("===================================================")

logger.info("================= Pip Environment =================")
pip_env = [
f"{dist.metadata['Name']}=={dist.version}" for dist in metadata.distributions()
]
logger.info(" ".join(pip_env))
logger.info("===================================================")
2 changes: 2 additions & 0 deletions alphadia/workflow/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ def __init__(
self._parent_path: str = quant_path or os.path.join(
config["output"], QUANT_FOLDER_NAME
)
logger.info(f"Saving quantification results to {self._parent_path}")

self._config: Config = config
self.reporter: reporting.Pipeline | None = None
self._dia_data: bruker.TimsTOFTranspose | alpharaw_wrapper.AlphaRaw | None = (
Expand Down
8 changes: 6 additions & 2 deletions alphadia/workflow/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,12 +494,16 @@ def __init__(
"fwhm_mobility": config["optimization_manager"]["fwhm_mobility"],
"score_cutoff": config["optimization_manager"]["score_cutoff"],
}
self.__dict__.update(initial_parameters)
self.__dict__.update(
mschwoer marked this conversation as resolved.
Show resolved Hide resolved
initial_parameters
) # TODO either store this as a dict or in individual instance variables

for key, value in initial_parameters.items():
self.reporter.log_string(f"initial parameter: {key} = {value}")

def fit(self, update_dict):
def fit(
self, update_dict
): # TODO siblings' implementations have different signatures
mschwoer marked this conversation as resolved.
Show resolved Hide resolved
"""Update the parameters dict with the values in update_dict."""
self.__dict__.update(update_dict)
self.is_fitted = True
Expand Down
Loading