diff --git a/.github/workflows/docs_build_and_deploy.yml b/.github/workflows/docs_build_and_deploy.yml new file mode 100644 index 0000000..f9e3a5c --- /dev/null +++ b/.github/workflows/docs_build_and_deploy.yml @@ -0,0 +1,46 @@ +name: Docs + +# Generate the documentation on all merges to main, all pull requests, or by +# manual workflow dispatch. The build job can be used as a CI check that the +# docs still build successfully. The deploy job only runs when a tag is +# pushed and actually moves the generated html to the gh-pages branch +# (which triggers a GitHub pages deployment). +on: + push: + branches: + - main + tags: + - '*' + pull_request: + merge_group: + workflow_dispatch: + +jobs: + linting: + # scheduled workflows should not run on forks + if: (${{ github.event_name == 'schedule' }} && ${{ github.repository_owner == 'neuroinformatics-unit' }} && ${{ github.ref == 'refs/heads/main' }}) || (${{ github.event_name != 'schedule' }}) + runs-on: ubuntu-latest + steps: + - uses: neuroinformatics-unit/actions/lint@v2 + + build_sphinx_docs: + name: Build Sphinx Docs + runs-on: ubuntu-latest + steps: + - uses: neuroinformatics-unit/actions/build_sphinx_docs@main + with: + python-version: 3.11 + use-make: true + + deploy_sphinx_docs: + name: Deploy Sphinx Docs + needs: build_sphinx_docs + permissions: + contents: write + if: (github.event_name == 'push' && github.ref_type == 'tag') || github.event_name == 'workflow_dispatch' + runs-on: ubuntu-latest + steps: + - uses: neuroinformatics-unit/actions/deploy_sphinx_docs@main + with: + secret_input: ${{ secrets.GITHUB_TOKEN }} + use-make: true diff --git a/README.md b/README.md index 5766261..211aeec 100644 --- a/README.md +++ b/README.md @@ -1,229 +1,13 @@ -> **Warning** -> **Spikewrap is not sufficiently tested to be used in analysis. This release is only for testing. Do not use for your final analyses.** +# spikewrap -> **Warning** **Limitations** -> - works only on SpikeGLX recordings with 1 gate, 1 trigger, 1 probe (per run, e.g. g0, t0, imec0) -> - requires standard input folder format -> - only run one subject / run at a time -> - has limited preprocessing options (`tshift`, `bandpass_filter`, `common median reference`) -> - no options to remove potentially large intermediate files -> - installation / running on HPC is a bit clunky. In future this can be simplified with SLURM jobs organised under the hood and setting up a HPC module. -> - untested! -> - The documentation is currently outdated. +``spikewrap`` is a tool for automating extracellular electrophysiology analysis. +See the documentation to a +[1-minute introduction]() +and to +[get started](). -# Features -- preprocess SpikeGLX data (`tshift`, `bandpass_filter`, `common median reference`) -- spike sorting (`kilosort2`, `kilosort2_5`, `kilosort3`) -- quality check measures on the sorting results +## Installation -# Local Installation - -Sorting requires a NVIDIA GPU and so is currently only available using the SWC's High-Performance Computer (HPC). However, local installation is useful for visualising the preprocessing steps prior to running the full pipeline (see 'Visualisation' below). - -To install locally, clone the repository to your local machine using git. - -`git clone git@github.com:neuroinformatics-unit/spikewrap.git` - -Change directory to the repo and install using - -`pip install -e .` - -or, to also install developer dependencies - -`pip install -e .[dev]` - -or if using the zsh shell - -`pip install -e ".[dev]"` - -After installation, the module can be imported with `import spikewrap`. - -## Running on the HPC - -Currently, sorting is required to run on the SWC HPC with access to `/ceph/neuroinformatics`. - -To connect and run on the HPC (e.g. from Windows, macOS or Linux terminal): - -`ssh username@ssh.swc.ucl.ac.uk` - -`ssh hpc-gw1` - -The first time using, it is necessary to steup and install `spikewrap`. It is strongly recommended to make a new conda environment on the HPC, before installing `spikewrap`. - -`module load miniconda` - -`conda create --name spikewrap python=3.10` - -`conda activate spikewrap` - -and install spikewrap and it's dependencies: - -`mkdir ~/git-repos` - -`cd ~/git-repos` - -`git clone https://github.com/JoeZiminski/spikewrap.git` - -`cd spikewrap` - -`pip install -e .` - -Before running, it is necessary to request use of a GPU node on the HPC to run spike sorting with KiloSort. To run preprocessing and spike sorting, create a script using the API or call from the command line interface (instructions below). - -`srun -p gpu --gres=gpu:1 -n 8 --mem=40GB --pty bash -i` - -`module load cuda` - -`module load miniconda` - -`conda activate spikewrap` - -`python my_pipeline_script.py` - -# Quick Start Guide - -Spikewrap (currently) expects input data to be stored in a `rawdata` folder. A subject (e.g. mouse) data should be stored in the `rawdata` folder and contain SpikeGLX output format (example below). **Currently, only recordings with 1 gate, 1 trigger and 1 probe are supported (i.e. index 0 for all gate, trigger probe, `g0`, `t0` and `imec0`)**. - -``` -└── rawdata/ - └── 1110925/ - └── 1110925_test_shank1_g0/ - └── 1110925_test_shank1_g0_imec0/ - ├── 1110925_test_shank1_g0_t0.imec0.ap.bin - └── 1110925_test_shank1_g0_t0.imec0.ap.meta -``` - - -## API (script) - -Example code to analyse this data in this format is below: - -``` -from spikewrap.pipeline.full_pipeline import run_full_pipeline - -base_path = "/ceph/neuroinformatics/neuroinformatics/scratch/ece_ephys_learning" - -if __name__ == "__main__": - - run_full_pipeline( - base_path=base_path, - sub_name="sub-001", - run_name="ses-001_condition-lse", - config_name="test", - sorter="kilosort2_5", - ) -``` - -`base_path` is the path containing the required `rawdata` folder. - -`sub_name` is the subject to run, and `run_name` is the SpikeGLX run name to run. - -`configs_name` contains the name of the preprocessing / sorting settings to use (see below) - -`sorter` is the name of the sorter to use (currently supported is `kilosort2`, `kilosort2_5` and `kilosort3`) - -Note `run_full_pipline` must be run in the `if __name__ == "__main__"` block as it uses the `multiprocessing` module. - -## Output - -Output of spike sorting will be in a `derivatives` folder at the same level as the `rawdata`. The subfolder organisation of `derivatives` will match `rawdata`. - -Output are the saved preprocessed data, spike sorting results as well as a list of [quality check measures](https://spikeinterface.readthedocs.io/en/latest/modules/qualitymetrics.html). For example, the full output of a sorting run with the input data as above is: - -``` -├── rawdata/ -│ └── ... -└── derivatives/ - └── 1110925/ - └── 1110925_test_shank1_g0 / - └── 1110925_test_shank1_g0_imec0/ - ├── preprocessed/ - │ ├── data_class.pkl - │ └── si_recording - ├── kilosort2_5-sorting/ - ├── in_container_sorting/ - ├── sorter_output/ - ├── waveforms/ - │ └── - ├── quality_metrics.csv - ├── spikeinterface_log.json - ├── spikeinterface_params.json - └── spikeinterface_recording.json -``` - - -**preprocessed**: - -- Binary-format spikeinterface recording from the final preprocessing step (`si_recording`) 2) `data_class.pkl` spikewrap internal use. - -**-sorting output (e.g. kilosort2_5-sorting, multiple sorters can be run)**: - -- in_container_sorting: stored options used to run the sorter - -- sorter_output: the full output of the sorter (e.g. kilosort .npy files) - -- waveforms: spikeinterface [waveforms](https://spikeinterface.readthedocs.io/en/latest/modules/core.html#waveformextractor) output containing AP -waveforms for detected spikes - -- quality_metrics.csv: output of spikeinterface [quality check measures](https://spikeinterface.readthedocs.io/en/latest/modules/qualitymetrics.html) - -# Set Preprocessing Options - -Currently supported are multiplexing correction or tshift (termed `phase shift` here), common median referencing (CMR) (termed `common_reference` here) and bandpass filtering (`bandpass_filter`). These options provide an interface to [SpikeInterface preprocessing](https://spikeinterface.readthedocs.io/en/0.13.0/modules/toolkit/plot_1_preprocessing.html) options, more will be added soon. - -Preprocessing options are set in `yaml` configuration files stored in `sbi_ephys/sbi_ephys/configs/`. A default pipeline is stored in `test.yaml`. - -Custom preprocessing configuration files may be passed to the `config_name` argument, by passing the full path to the `.yaml` configuration file. For example: - -``` -'preprocessing': - '1': - - phase_shift - - {} - '2': - - bandpass_filter - - freq_min: 300 - freq_max: 6000 - '3': - - common_reference - - operator: median - reference: global - -'sorting': - 'kilosort3': - 'car': False - 'freq_min': 300 -``` - -Configuration files are structured as a dictionary where keys indicate the order to run preprocessing The values hold a list in which the first element is the name of the preprocessing step to run, and the second element a dictionary containing kwargs passed to the spikeinterface function. - -# Visualise Preprocessing - -Visualising preprocesing output can be run locally to inspect output of preprocessing routines. To visualise preprocessing outputs: - -``` -from spikewrap.pipeline.preprocess import preprocess -from spikewrap.pipeline.visualise import visualise - -base_path = "/ceph/neuroinformatics/neuroinformatics/scratch/ece_ephys_learning" -sub_name = "1110925" -run_name = "1110925_test_shank1" - -data = preprocess(base_path=base_path, sub_name=sub_name, run_name=run_name) - -visualise( - data, - steps="all", - mode="map", - as_subplot=True, - channel_idx_to_show=np.arange(10, 50), - show_channel_ids=False, - time_range=(1, 2), -) -``` - -This will display a plot showing data from all preprocessing steps, displaying channels with idx 10 - 50, over time period 1-2. Note this requires a GUI (i.e. not run on the HPC terminal) and is best run locally. - -![plot](./readme_image.png) +``pip install spikewrap`` \ No newline at end of file diff --git a/docs/source/sg_execution_times.rst b/docs/source/sg_execution_times.rst index cdf7c85..7b65fb4 100644 --- a/docs/source/sg_execution_times.rst +++ b/docs/source/sg_execution_times.rst @@ -6,7 +6,7 @@ Computation times ================= -**00:22.634** total execution time for 7 files **from all galleries**: +**00:21.500** total execution time for 7 files **from all galleries**: .. container:: @@ -33,23 +33,23 @@ Computation times - Time - Mem (MB) * - :ref:`sphx_glr_gallery_builds_how_to_01_preprocess_a_session.py` (``galleries\how_to\01_preprocess_a_session.py``) - - 00:09.089 - - 0.0 - * - :ref:`sphx_glr_gallery_builds_get_started_package_overview.py` (``galleries\get_started\package_overview.py``) - - 00:07.144 + - 00:08.829 - 0.0 * - :ref:`sphx_glr_gallery_builds_tutorials_01_preprocessing_sessions.py` (``galleries\tutorials\01_preprocessing_sessions.py``) - - 00:06.383 + - 00:06.401 - 0.0 - * - :ref:`sphx_glr_gallery_builds_how_to_02_manage_configs.py` (``galleries\how_to\02_manage_configs.py``) - - 00:00.011 + * - :ref:`sphx_glr_gallery_builds_get_started_package_overview.py` (``galleries\get_started\package_overview.py``) + - 00:06.244 - 0.0 * - :ref:`sphx_glr_gallery_builds_tutorials_02_managing_configs.py` (``galleries\tutorials\02_managing_configs.py``) - - 00:00.008 + - 00:00.019 - 0.0 - * - :ref:`sphx_glr_gallery_builds_how_to_03_run_in_slurm.py` (``galleries\how_to\03_run_in_slurm.py``) - - 00:00.000 + * - :ref:`sphx_glr_gallery_builds_how_to_02_manage_configs.py` (``galleries\how_to\02_manage_configs.py``) + - 00:00.005 - 0.0 * - :ref:`sphx_glr_gallery_builds_tutorials_03_running_with_slurm.py` (``galleries\tutorials\03_running_with_slurm.py``) - - 00:00.000 + - 00:00.001 + - 0.0 + * - :ref:`sphx_glr_gallery_builds_how_to_03_run_in_slurm.py` (``galleries\how_to\03_run_in_slurm.py``) + - 00:00.001 - 0.0 diff --git a/spikewrap/structure/_run.py b/spikewrap/structure/_run.py index 4ae550c..a963c53 100644 --- a/spikewrap/structure/_run.py +++ b/spikewrap/structure/_run.py @@ -50,26 +50,8 @@ def __init__( # the lifetime of the class, by this class only. self._raw: dict = {} self._preprocessed: dict = {} - self._sync = None - # TODO: I think just remove these...? this is not a public class, very confusing.. - @property - def parent_input_path(self) -> Path | None: - return self._parent_input_path - - @property - def run_name(self) -> str: - return self._run_name - - @property - def output_path(self) -> Path: - return self._output_path - - @property - def file_format(self) -> Literal["spikeglx", "openephys"]: - return self._file_format - # --------------------------------------------------------------------------- # Public Functions # --------------------------------------------------------------------------- @@ -107,7 +89,7 @@ def preprocess(self, pp_steps: dict, per_shank: bool) -> None: rec_name = f"shank_{key}" if key != canon.grouped_shankname() else key self._preprocessed[key] = Preprocessed( - raw_rec, pp_steps, self.output_path, rec_name + raw_rec, pp_steps, self._output_path, rec_name ) def save_preprocessed( @@ -118,17 +100,17 @@ def save_preprocessed( self._save_preprocessed_slurm(overwrite, chunk_size, n_jobs, slurm) return - _utils.message_user(f"Saving data for: {self.run_name}...") + _utils.message_user(f"Saving data for: {self._run_name}...") if n_jobs != 1: si.set_global_job_kwargs(n_jobs=n_jobs) - if self.output_path.is_dir(): # getter func? + if self._output_path.is_dir(): # getter func? if overwrite: - self._delete_existing_run_except_slurm_logs(self.output_path) + self._delete_existing_run_except_slurm_logs(self._output_path) else: raise RuntimeError( - f"`overwrite` is `False` but data already exists at the run path: {self.output_path}." + f"`overwrite` is `False` but data already exists at the run path: {self._output_path}." ) self._save_sync_channel() @@ -165,7 +147,7 @@ def plot_preprocessed( raise RuntimeError("Preprocessing has not been run.") fig = visualise_run_preprocessed( - self.run_name, + self._run_name, show, self._preprocessed, mode=mode, @@ -189,7 +171,7 @@ def _split_by_shank(self) -> None: """ """ assert not self._is_split_by_shank(), ( f"Attempting to split by shank, but the recording" - f"in run: {self.run_name} has already been split." + f"in run: {self._run_name} has already been split." f"This should not happen. Please contact the spikewrap team." ) @@ -197,14 +179,14 @@ def _split_by_shank(self) -> None: "group" ) is None: raise ValueError( - f"Cannot split run {self.run_name} by shank as there is no 'group' property." + f"Cannot split run {self._run_name} by shank as there is no 'group' property." ) self._raw = recording.split_by("group") self._raw = {str(key): value for key, value in self._raw.items()} _utils.message_user( - f"Split run: {self.run_name} by shank. There are {len(self._raw)} shanks. " + f"Split run: {self._run_name} by shank. There are {len(self._raw)} shanks. " ) def _save_preprocessed_slurm( @@ -222,7 +204,7 @@ def _save_preprocessed_slurm( "n_jobs": n_jobs, "slurm": False, }, - log_base_path=self.output_path, + log_base_path=self._output_path, ) def _save_sync_channel(self) -> None: @@ -231,9 +213,9 @@ def _save_sync_channel(self) -> None: if the sync channel is loaded to ensure it does not interfere with sorting. As such, the sync channel is handled separately here. """ - sync_output_path = self.output_path / canon.sync_folder() + sync_output_path = self._output_path / canon.sync_folder() - _utils.message_user(f"Saving sync channel for: {self.run_name}...") + _utils.message_user(f"Saving sync channel for: {self._run_name}...") if self._sync: _saving.save_sync_channel(self._sync, sync_output_path, self._file_format) @@ -280,7 +262,7 @@ def load_raw_data(self, internal_overwrite: bool = False) -> None: raise RuntimeError("Cannot overwrite Run().") without_sync, with_sync = _loading.load_data( - self.parent_input_path / self.run_name, self._file_format + self._parent_input_path / self._run_name, self._file_format ) self._raw = {canon.grouped_shankname(): without_sync} @@ -356,7 +338,7 @@ def save_preprocessed( """ super().save_preprocessed(overwrite, chunk_size, n_jobs, slurm) - with open(self.output_path / "orig_run_names.txt", "w") as f: + with open(self._output_path / "orig_run_names.txt", "w") as f: f.write("\n".join(self.orig_run_names)) def _check_and_format_recordings_to_concat( @@ -386,7 +368,7 @@ def _check_and_format_recordings_to_concat( raw_data.append(run._raw) sync_data.append(run._sync) - orig_run_names.append(run.run_name) + orig_run_names.append(run._run_name) assert all( list(dict_.keys()) == [canon.grouped_shankname()] for dict_ in raw_data @@ -404,13 +386,13 @@ def _check_and_format_recordings_to_concat( ): raise RuntimeError( f"Cannot concatenate recordings with different channel organisation." - f"This occurred for runs in folder: {self.parent_input_path}" + f"This occurred for runs in folder: {self._parent_input_path}" ) if not np.unique(all_sampling_frequency).size == 1: raise RuntimeError( f"Cannot concatenate recordings with different sampling frequencies." - f"This occurred for runs in folder: {self.parent_input_path}" + f"This occurred for runs in folder: {self._parent_input_path}" ) return raw_data, sync_data, orig_run_names diff --git a/spikewrap/structure/session.py b/spikewrap/structure/session.py index 0aec70a..2510714 100644 --- a/spikewrap/structure/session.py +++ b/spikewrap/structure/session.py @@ -84,31 +84,6 @@ def __init__( self._runs: list[SeparateRun | ConcatRun] = [] self._create_run_objects() - @property - def passed_run_names(self) -> Literal["all"] | list[str]: - """ """ - return self._passed_run_names - - @property - def file_format(self) -> Literal["spikeglx", "openephys"]: - """ """ - return self._file_format - - @property - def parent_input_path(self) -> Path: - """ """ - return self._parent_input_path - - @property - def ses_name(self) -> str: - """ """ - return self._ses_name - - @property - def output_path(self) -> Path: - """ """ - return self._output_path - # --------------------------------------------------------------------------- # Public Functions # --------------------------------------------------------------------------- @@ -264,11 +239,11 @@ def plot_preprocessed( figsize=figsize, ) - all_figs[run.run_name] = fig + all_figs[run._run_name] = fig return all_figs - # Helpers ----------------------------------------------------------------- + # Getters ----------------------------------------------------------------- def get_run_names(self) -> list[str]: """ @@ -278,7 +253,16 @@ def get_run_names(self) -> list[str]: list will be the order of concatenation. If concatenation was already performed, the run name will be ``"concat_run"``. """ - return [run.run_name for run in self._runs] + return [run._run_name for run in self._runs] + + def parent_input_path(self) -> Path: # TODO: add docs + return self._parent_input_path + + def get_passed_run_names(self) -> Literal["all"] | list[str]: + return self._passed_run_names + + def get_output_path(self): + return self._output_path # --------------------------------------------------------------------------- # Private Functions @@ -300,16 +284,16 @@ def _create_run_objects(self, internal_overwrite: bool = False) -> None: Safety flag to ensure overwriting existing runs is intended. """ if self._runs and not internal_overwrite: - raise RuntimeError(f"Cannot overwrite _runs for session {self.ses_name}") + raise RuntimeError(f"Cannot overwrite _runs for session {self._ses_name}") session_path = ( - self.parent_input_path / self.ses_name + self._parent_input_path / self._ses_name ) # will not include "ephys" run_paths = _loading.get_run_paths( - self.file_format, + self._file_format, session_path, - self.passed_run_names, + self._passed_run_names, ) runs: list[SeparateRun] = [] @@ -319,7 +303,7 @@ def _create_run_objects(self, internal_overwrite: bool = False) -> None: SeparateRun( parent_input_path=run_path.parent, # may include "ephys" if NeuroBlueprint run_name=run_path.name, - session_output_path=self.output_path, + session_output_path=self._output_path, file_format=self._file_format, ) ) @@ -350,8 +334,8 @@ def _concat_runs(self) -> None: self._runs = [ ConcatRun( self._runs, # type: ignore - self.parent_input_path, - self.output_path, + self._parent_input_path, + self._output_path, self._file_format, ) ] @@ -381,11 +365,11 @@ def _output_from_parent_input_path(self) -> Path: raise ValueError( f"Cannot infer `output_path` from non-NeuroBlueprint " f"folder structure (expected 'rawdata'->subject->session\n" - f"in path {self.parent_input_path}\n" + f"in path {self._parent_input_path}\n" f"Pass the session output folder explicitly as `output_path`." ) - return rawdata_path.parent / "derivatives" / sub_name / self.ses_name / "ephys" + return rawdata_path.parent / "derivatives" / sub_name / self._ses_name / "ephys" # Checkers ------------------------------------------------------------------ @@ -397,7 +381,7 @@ def _resolve_subject_input_path(self) -> tuple[Path, str]: This is true whether in NeuroBlueprint or other accepted folder formats found in the documentation. """ - sub_path = self.parent_input_path + sub_path = self._parent_input_path sub_name = sub_path.name return sub_path, sub_name