Skip to content

Commit

Permalink
Merge branch 'main' into equate_spikeglx_in_spikeinterface
Browse files Browse the repository at this point in the history
  • Loading branch information
h-mayorquin committed Dec 9, 2024
2 parents 71b3299 + 4b3172c commit a965d23
Show file tree
Hide file tree
Showing 10 changed files with 207 additions and 15 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/deploy-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ jobs:
if: ${{ needs.assess-file-changes.outputs.SOURCE_CHANGED == 'true' }}
uses: ./.github/workflows/live-service-testing.yml
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
S3_GIN_BUCKET: ${{ secrets.S3_GIN_BUCKET }}
DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }}
with: # Ternary operator: condition && value_if_true || value_if_false
python-versions: ${{ github.event.pull_request.draft == true && '["3.9"]' || needs.load_python_and_os_versions.outputs.ALL_PYTHON_VERSIONS }}
Expand Down
16 changes: 16 additions & 0 deletions .github/workflows/live-service-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ on:
type: string

secrets:
AWS_ACCESS_KEY_ID:
required: true
AWS_SECRET_ACCESS_KEY:
required: true
S3_GIN_BUCKET:
required: true
DANDI_API_KEY:
required: true

Expand Down Expand Up @@ -45,7 +51,17 @@ jobs:
- name: Install full requirements
run: pip install .[test,full]

- name: Prepare data for tests
uses: ./.github/actions/load-data
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
s3-gin-bucket: ${{ secrets.S3_GIN_BUCKET }}
os: ${{ matrix.os }}

- name: Run subset of tests that use DANDI live services
run: pytest -rsx -n auto tests/test_minimal/test_tools/dandi_transfer_tools.py
- name: Run subset of tests that use DANDI live services with YAML
run: pytest -rsx -n auto tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py
- name: Run subset of tests that use Globus live services
run: pytest -rsx -n auto tests/test_minimal/test_tools/globus_transfer_tools.py
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# v0.6.6 (Upcoming)

## Deprecations
* Completely removed compression settings from most places [PR #1126](https://github.com/catalystneuro/neuroconv/pull/1126)
* Removed use of `jsonschema.RefResolver` as it will be deprecated from the jsonschema library [PR #1133](https://github.com/catalystneuro/neuroconv/pull/1133)
* Completely removed compression settings from most places[PR #1126](https://github.com/catalystneuro/neuroconv/pull/1126)

## Bug Fixes
* datetime objects now can be validated as conversion options [#1139](https://github.com/catalystneuro/neuroconv/pull/1126)
Expand All @@ -15,6 +16,7 @@
* `SpikeGLXRecordingInterface` now also accepts `folder_path` making its behavior equivalent to SpikeInterface [#1150](https://github.com/catalystneuro/neuroconv/pull/1150)
* Added the `rclone_transfer_batch_job` helper function for executing Rclone data transfers in AWS Batch jobs. [PR #1085](https://github.com/catalystneuro/neuroconv/pull/1085)
* Added the `deploy_neuroconv_batch_job` helper function for deploying NeuroConv AWS Batch jobs. [PR #1086](https://github.com/catalystneuro/neuroconv/pull/1086)
* YAML specification files now accept an outer keyword `upload_to_dandiset="< six-digit ID >"` to automatically upload the produced NWB files to the DANDI archive [PR #1089](https://github.com/catalystneuro/neuroconv/pull/1089)


## Improvements
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ dependencies = [
"parse>=1.20.0",
"click",
"docstring-parser",
"packaging" # Issue 903
"packaging", # Issue 903
"referencing",
]


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"required": ["experiments"],
"additionalProperties": false,
"properties": {
"upload_to_dandiset": {"type": "string"},
"metadata": {"$ref": "./metadata_schema.json#"},
"conversion_options": {"type": "object"},
"data_interfaces": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import sys
import json
import os
from importlib import import_module
from pathlib import Path
from typing import Optional

import click
from jsonschema import RefResolver, validate
from jsonschema import validate
from pydantic import DirectoryPath, FilePath
from referencing import Registry, Resource

from ..data_transfers import automatic_dandi_upload
from ...nwbconverter import NWBConverter
from ...utils import dict_deep_update, load_dict_from_file

Expand Down Expand Up @@ -50,7 +53,7 @@ def run_conversion_from_yaml(
data_folder_path: Optional[DirectoryPath] = None,
output_folder_path: Optional[DirectoryPath] = None,
overwrite: bool = False,
):
) -> None:
"""
Run conversion to NWB given a yaml specification file.
Expand Down Expand Up @@ -85,14 +88,29 @@ def run_conversion_from_yaml(

specification = load_dict_from_file(file_path=specification_file_path)
schema_folder = Path(__file__).parent.parent.parent / "schemas"

# Load all required schemas
specification_schema = load_dict_from_file(file_path=schema_folder / "yaml_conversion_specification_schema.json")
sys_uri_base = "file:/" if sys.platform.startswith("win32") else "file://"
metadata_schema = load_dict_from_file(file_path=schema_folder / "metadata_schema.json")

# The yaml specification references the metadata schema, so we need to load it into the registry
registry = Registry().with_resource("metadata_schema.json", Resource.from_contents(metadata_schema))

# Validate using the registry
validate(
instance=specification,
schema=specification_schema,
resolver=RefResolver(base_uri=sys_uri_base + str(schema_folder) + "/", referrer=specification_schema),
registry=registry,
)

upload_to_dandiset = "upload_to_dandiset" in specification
if upload_to_dandiset and "DANDI_API_KEY" not in os.environ:
message = (
"The 'upload_to_dandiset' prompt was found in the YAML specification, "
"but the environment variable 'DANDI_API_KEY' was not set."
)
raise ValueError(message)

global_metadata = specification.get("metadata", dict())
global_conversion_options = specification.get("conversion_options", dict())
data_interfaces_spec = specification.get("data_interfaces")
Expand All @@ -108,28 +126,55 @@ def run_conversion_from_yaml(
experiment_metadata = experiment.get("metadata", dict())
for session in experiment["sessions"]:
file_counter += 1

source_data = session["source_data"]
for interface_name, interface_source_data in session["source_data"].items():
for key, value in interface_source_data.items():
if key == "file_paths":
source_data[interface_name].update({key: [str(Path(data_folder_path) / x) for x in value]})
elif key in ("file_path", "folder_path"):
source_data[interface_name].update({key: str(Path(data_folder_path) / value)})

converter = CustomNWBConverter(source_data=source_data)

metadata = converter.get_metadata()
for metadata_source in [global_metadata, experiment_metadata, session.get("metadata", dict())]:
metadata = dict_deep_update(metadata, metadata_source)
nwbfile_name = session.get("nwbfile_name", f"temp_nwbfile_name_{file_counter}").strip(".nwb")

session_id = session.get("metadata", dict()).get("NWBFile", dict()).get("session_id", None)
if upload_to_dandiset and session_id is None:
message = (
"The 'upload_to_dandiset' prompt was found in the YAML specification, "
"but the 'session_id' was not found for session with info block: "
f"\n\n {json.dumps(obj=session, indent=2)}\n\n"
"File intended for DANDI upload must include a session ID."
)
raise ValueError(message)

session_conversion_options = session.get("conversion_options", dict())
conversion_options = dict()
for key in converter.data_interface_objects:
conversion_options[key] = dict(session_conversion_options.get(key, dict()), **global_conversion_options)

nwbfile_name = session.get("nwbfile_name", f"temp_nwbfile_name_{file_counter}").strip(".nwb")
converter.run_conversion(
nwbfile_path=output_folder_path / f"{nwbfile_name}.nwb",
metadata=metadata,
overwrite=overwrite,
conversion_options=conversion_options,
)

if upload_to_dandiset:
dandiset_id = specification["upload_to_dandiset"]
staging = int(dandiset_id) >= 200_000
automatic_dandi_upload(
dandiset_id=dandiset_id,
nwb_folder_path=output_folder_path,
staging=staging,
)

return None # We can early return since organization below will occur within the upload step

# To properly mimic a true dandi organization, the full directory must be populated with NWBFiles.
all_nwbfile_paths = [nwbfile_path for nwbfile_path in output_folder_path.iterdir() if nwbfile_path.suffix == ".nwb"]
nwbfile_paths_to_set = [
Expand Down
1 change: 1 addition & 0 deletions tests/imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def test_tools(self):
"get_package_version",
"is_package_installed",
"deploy_process",
"data_transfers",
"LocalPathExpander",
"get_module",
]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
metadata:
NWBFile:
lab: My Lab
institution: My Institution

conversion_options:
stub_test: True

data_interfaces:
ap: SpikeGLXRecordingInterface
lf: SpikeGLXRecordingInterface
phy: PhySortingInterface

upload_to_dandiset: "200560"

experiments:
ymaze:
metadata:
NWBFile:
session_description: Subject navigating a Y-shaped maze.

sessions:
- nwbfile_name: example_converter_spec_1
source_data:
ap:
file_path: spikeglx/Noise4Sam_g0/Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.ap.bin
metadata:
NWBFile:
session_start_time: "2020-10-09T21:19:09+00:00"
session_id: "test-yaml-1"
Subject:
subject_id: "yaml-1"
sex: F
age: P35D
species: Mus musculus
- nwbfile_name: example_converter_spec_2.nwb
metadata:
NWBFile:
session_start_time: "2020-10-10T21:19:09+00:00"
session_id: "test-yaml-2"
Subject:
subject_id: "yaml-002"
sex: F
age: P35D
species: Mus musculus
source_data:
lf:
file_path: spikeglx/Noise4Sam_g0/Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.lf.bin

open_explore:
sessions:
- nwbfile_name: example_converter_spec_3
source_data:
lf:
file_path: spikeglx/Noise4Sam_g0/Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.lf.bin
phy:
folder_path: phy/phy_example_0/
metadata:
NWBFile:
session_start_time: "2020-10-11T21:19:09+00:00"
session_id: test YAML 3
Subject:
subject_id: YAML Subject Name
sex: F
age: P35D
species: Mus musculus
18 changes: 11 additions & 7 deletions tests/test_on_data/test_yaml/test_yaml_conversion_specification.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import sys
import unittest
from datetime import datetime
from pathlib import Path

import pytest
from hdmf.testing import TestCase
from jsonschema import RefResolver, validate
from jsonschema import validate
from pynwb import NWBHDF5IO
from referencing import Registry, Resource

from neuroconv import run_conversion_from_yaml
from neuroconv.utils import load_dict_from_file
Expand All @@ -19,6 +19,7 @@
"fname",
[
"GIN_conversion_specification.yml",
"GIN_conversion_specification_dandi_upload.yml",
"GIN_conversion_specification_missing_nwbfile_names.yml",
"GIN_conversion_specification_no_nwbfile_name_or_other_metadata.yml",
"GIN_conversion_specification_videos.yml",
Expand All @@ -27,16 +28,19 @@
def test_validate_example_specifications(fname):
path_to_test_yml_files = Path(__file__).parent / "conversion_specifications"
schema_folder = path_to_test_yml_files.parent.parent.parent.parent / "src" / "neuroconv" / "schemas"

# Load schemas
specification_schema = load_dict_from_file(file_path=schema_folder / "yaml_conversion_specification_schema.json")
sys_uri_base = "file://"
if sys.platform.startswith("win32"):
sys_uri_base = "file:/"
metadata_schema = load_dict_from_file(file_path=schema_folder / "metadata_schema.json")

# The yaml specification references the metadata schema, so we need to load it into the registry
registry = Registry().with_resource("metadata_schema.json", Resource.from_contents(metadata_schema))

yaml_file_path = path_to_test_yml_files / fname
validate(
instance=load_dict_from_file(file_path=yaml_file_path),
schema=load_dict_from_file(file_path=schema_folder / "yaml_conversion_specification_schema.json"),
resolver=RefResolver(base_uri=sys_uri_base + str(schema_folder) + "/", referrer=specification_schema),
schema=specification_schema,
registry=registry,
)


Expand Down
53 changes: 53 additions & 0 deletions tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import os
import platform
import time
from datetime import datetime, timedelta
from pathlib import Path

import dandi.dandiapi
import pytest
from packaging.version import Version

from neuroconv import run_conversion_from_yaml

from ..setup_paths import ECEPHY_DATA_PATH, OUTPUT_PATH

DANDI_API_KEY = os.getenv("DANDI_API_KEY")
HAVE_DANDI_KEY = DANDI_API_KEY is not None and DANDI_API_KEY != "" # can be "" from external forks
_PYTHON_VERSION = platform.python_version()


@pytest.mark.skipif(
not HAVE_DANDI_KEY or Version(".".join(_PYTHON_VERSION.split(".")[:2])) != Version("3.12"),
reason="You must set your DANDI_API_KEY to run this test!",
)
def test_run_conversion_from_yaml_with_dandi_upload():
path_to_test_yml_files = Path(__file__).parent / "conversion_specifications"
yaml_file_path = path_to_test_yml_files / "GIN_conversion_specification_dandi_upload.yml"
run_conversion_from_yaml(
specification_file_path=yaml_file_path,
data_folder_path=ECEPHY_DATA_PATH,
output_folder_path=OUTPUT_PATH,
overwrite=True,
)

time.sleep(60) # Give some buffer room for server to process before making assertions against DANDI API

client = dandi.dandiapi.DandiAPIClient(api_url="https://api-staging.dandiarchive.org/api")
dandiset = client.get_dandiset("200560")

expected_asset_paths = [
"sub-yaml-1/sub-yaml-1_ses-test-yaml-1_ecephys.nwb",
"sub-yaml-002/sub-yaml-002_ses-test-yaml-2_ecephys.nwb",
"sub-YAML-Subject-Name/sub-YAML-Subject-Name_ses-test-YAML-3_ecephys.nwb",
]
for asset_path in expected_asset_paths:
test_asset = dandiset.get_asset_by_path(path=asset_path) # Will error if not found
test_asset_metadata = test_asset.get_raw_metadata()

# Past uploads may have created the same apparent file, so look at the modification time to ensure
# this test is actually testing the most recent upload
date_modified = datetime.fromisoformat(
test_asset_metadata["dateModified"].split("Z")[0] # Timezones look a little messy
)
assert datetime.now() - date_modified < timedelta(minutes=10)

0 comments on commit a965d23

Please sign in to comment.