From 4d86c389e0471cff433982cab7bd3ccaa9215ede Mon Sep 17 00:00:00 2001 From: Aliaksei Chareshneu Date: Tue, 1 Oct 2024 10:14:30 +0200 Subject: [PATCH 1/3] draft for mvs support (unfinished) --- .gitignore | 3 + vs_toolkit/extra_models.py | 4 + vs_toolkit/vs_toolkit.py | 221 +++++++++++++++++++++---------------- 3 files changed, 131 insertions(+), 97 deletions(-) create mode 100644 vs_toolkit/extra_models.py diff --git a/.gitignore b/.gitignore index d9cb24a2..1901fb33 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ ### Custom +working_folder/* +preprocessor/cellstar_preprocessor/tools/open_wrl_file/* molstar-extension/site/* molstar-extension/node_modules/ molstar-extension/debug.log @@ -317,3 +319,4 @@ dmypy.json # Cython debug symbols cython_debug/ +working_folder/custom-actin/.zattrs diff --git a/vs_toolkit/extra_models.py b/vs_toolkit/extra_models.py new file mode 100644 index 00000000..0ac08859 --- /dev/null +++ b/vs_toolkit/extra_models.py @@ -0,0 +1,4 @@ +import molviewspec + +class MVSJ: + pass \ No newline at end of file diff --git a/vs_toolkit/vs_toolkit.py b/vs_toolkit/vs_toolkit.py index 211b1321..5526908e 100644 --- a/vs_toolkit/vs_toolkit.py +++ b/vs_toolkit/vs_toolkit.py @@ -6,6 +6,7 @@ from pathlib import Path from typing import Literal, Optional, Protocol, TypedDict, Union from zipfile import ZIP_DEFLATED, ZipFile +from molviewspec.builder import create_builder from cellstar_db.file_system.db import FileSystemVolumeServerDB from cellstar_db.models import Metadata, TimeInfo @@ -118,12 +119,13 @@ class JsonQueryParams(TypedDict): detail_lvl: Optional[int] max_points: Optional[int] +OutputFormatsT = Literal['mvsx', 'cvsx'] class ParsedArgs(TypedDict): db_path: Path out: Path json_params_path: Path - + format: OutputFormatsT def _parse_argparse_args(args: argparse.Namespace): # TODO: validate similar to query app @@ -131,6 +133,7 @@ def _parse_argparse_args(args: argparse.Namespace): db_path=Path(args.db_path), out=Path(args.out), json_params_path=Path(args.json_params_path), + format=str(args.format) ) @@ -301,11 +304,26 @@ def _get_volume_timeframes_from_metadata(grid_metadata: Metadata): return list(range(start, end + 1)) +def _to_mvsx(responses: list[QueryResponse], indexJson: CVSXFilesIndex): + """ + Only works for a volume query + """ + builder = create_builder() + # TODO: replacement for download? + ( + # TODO: local url in MVSX archive + builder.download(url='https://raw.githack.com/molstar/molstar-volseg/master/test-data/preprocessor/sample_volumes/emdb/EMD-1832.map') + .parse(format='map') + .raw_volume(source='map') + .volume_representation(type="isosurface") + .color(color='aqua') + # .color_from_uri(schema='volume', uri='./_examples/_input/volseg_example_annotations.json', format='json') + # .transparency(transparency=0.4) + ) -def _write_to_file(responses: list[QueryResponse], out_path: Path): +def _write_to_file(responses: list[QueryResponse], out_path: Path, format: OutputFormatsT): # TODO: add here index.json with data on each file - # should be similar to create in memory zip file = io.BytesIO() indexJson: CVSXFilesIndex = { @@ -317,99 +335,106 @@ def _write_to_file(responses: list[QueryResponse], out_path: Path): # 'geometricSegmentations': [], "query": None, } - with ZipFile(file, "w", ZIP_DEFLATED) as zip_file: - for r in responses: - response = r.response - type = r.type - input_data = r.input_data - - if type == "volume": - # name should be created based on type and input data - channel_id = input_data["channel_id"] - time = input_data["time"] - name = f"{type}_{channel_id}_{time}.bcif" - zip_file.writestr(name, response) - info: VolumeFileInfo = { - "channelId": channel_id, - "timeframeIndex": time, - "type": type, - } - if not "volumes" in indexJson: - indexJson["volumes"] = {} - - indexJson["volumes"][name] = info - - elif type == "lattice": - segmentation_id = input_data["segmentation_id"] - time = input_data["time"] - name = f"{type}_{segmentation_id}_{time}.bcif" - - info: LatticeSegmentationFileInfo = { - "timeframeIndex": time, - "type": type, - "segmentationId": segmentation_id, - } - if not "latticeSegmentations" in indexJson: - indexJson["latticeSegmentations"] = {} - - indexJson["latticeSegmentations"][name] = info - - zip_file.writestr(name, response) - elif type == "mesh": - # how to include segmentation id here? - segmentation_id = input_data["segmentation_id"] - time = input_data["time"] - meshes: list[str, bytes] = response - filenames = [] - for segment_id, content in meshes: - filename = f"{type}_{segment_id}_{segmentation_id}_{time}.bcif" - filenames.append(filename) - zip_file.writestr(filename, content) - - info: MeshSegmentationFilesInfo = { - "segmentationId": segmentation_id, - "timeframeIndex": time, - "segmentsFilenames": filenames, - "type": type, - } - - if not "meshSegmentations" in indexJson: - indexJson["meshSegmentations"] = [] - - indexJson["meshSegmentations"].append(info) - - elif type == "annotations" or type == "metadata" or type == "query": - name = f"{type}.json" - dumped_JSON: str = json.dumps(response, ensure_ascii=False, indent=4) - zip_file.writestr(name, data=dumped_JSON) - indexJson[type] = name - # TODO: change geometric-segmentation - elif type == "geometric-segmentation": - segmentation_id = input_data["segmentation_id"] - time = input_data["time"] - name = f"{type}_{segmentation_id}_{time}.json" - dumped_JSON: str = json.dumps(response, ensure_ascii=False, indent=4) - zip_file.writestr(name, data=dumped_JSON) - - info: GeometricSegmentationFileInfo = { - "segmentationId": segmentation_id, - "timeframeIndex": time, - "type": type, - } - if not "geometricSegmentations" in indexJson: - indexJson["geometricSegmentations"] = {} - - indexJson["geometricSegmentations"][name] = info - - dumped_index_JSON: str = json.dumps(indexJson, ensure_ascii=False, indent=4) - zip_file.writestr(INDEX_JSON_FILENAME, data=dumped_index_JSON) - - # print(indexJson) - zip_data = file.getvalue() - - with open(str(out_path.resolve()), "wb") as f: - f.write(zip_data) - + + match format: + case 'cvsx': + with ZipFile(file, "w", ZIP_DEFLATED) as zip_file: + for r in responses: + response = r.response + type = r.type + input_data = r.input_data + + if type == "volume": + # name should be created based on type and input data + channel_id = input_data["channel_id"] + time = input_data["time"] + name = f"{type}_{channel_id}_{time}.bcif" + zip_file.writestr(name, response) + info: VolumeFileInfo = { + "channelId": channel_id, + "timeframeIndex": time, + "type": type, + } + if not "volumes" in indexJson: + indexJson["volumes"] = {} + + indexJson["volumes"][name] = info + + elif type == "lattice": + segmentation_id = input_data["segmentation_id"] + time = input_data["time"] + name = f"{type}_{segmentation_id}_{time}.bcif" + + info: LatticeSegmentationFileInfo = { + "timeframeIndex": time, + "type": type, + "segmentationId": segmentation_id, + } + if not "latticeSegmentations" in indexJson: + indexJson["latticeSegmentations"] = {} + + indexJson["latticeSegmentations"][name] = info + + zip_file.writestr(name, response) + elif type == "mesh": + # how to include segmentation id here? + segmentation_id = input_data["segmentation_id"] + time = input_data["time"] + meshes: list[str, bytes] = response + filenames = [] + for segment_id, content in meshes: + filename = f"{type}_{segment_id}_{segmentation_id}_{time}.bcif" + filenames.append(filename) + zip_file.writestr(filename, content) + + info: MeshSegmentationFilesInfo = { + "segmentationId": segmentation_id, + "timeframeIndex": time, + "segmentsFilenames": filenames, + "type": type, + } + + if not "meshSegmentations" in indexJson: + indexJson["meshSegmentations"] = [] + + indexJson["meshSegmentations"].append(info) + + elif type == "annotations" or type == "metadata" or type == "query": + name = f"{type}.json" + dumped_JSON: str = json.dumps(response, ensure_ascii=False, indent=4) + zip_file.writestr(name, data=dumped_JSON) + indexJson[type] = name + # TODO: change geometric-segmentation + elif type == "geometric-segmentation": + segmentation_id = input_data["segmentation_id"] + time = input_data["time"] + name = f"{type}_{segmentation_id}_{time}.json" + dumped_JSON: str = json.dumps(response, ensure_ascii=False, indent=4) + zip_file.writestr(name, data=dumped_JSON) + + info: GeometricSegmentationFileInfo = { + "segmentationId": segmentation_id, + "timeframeIndex": time, + "type": type, + } + if not "geometricSegmentations" in indexJson: + indexJson["geometricSegmentations"] = {} + + indexJson["geometricSegmentations"][name] = info + + dumped_index_JSON: str = json.dumps(indexJson, ensure_ascii=False, indent=4) + zip_file.writestr(INDEX_JSON_FILENAME, data=dumped_index_JSON) + + # print(indexJson) + zip_data = file.getvalue() + + with open(str(out_path.resolve()), "wb") as f: + f.write(zip_data) + case 'mvsx': + mvsx = _to_mvsx(responses, indexJson) + file = _write_file(out_path, format) + case _: + raise Exception(f'Format {format} is not supported.') def _get_timeframes_from_timeinfo(t: TimeInfo, segmentation_id: str): return list(range(t[segmentation_id]["start"], t[segmentation_id]["end"] + 1)) @@ -606,7 +631,7 @@ async def query(args: argparse.Namespace): r = await query.execute() responses.append(r) - _write_to_file(responses, parsed_args["out"]) + _write_to_file(responses, parsed_args["out"], parsed_args["format"]) async def main(): @@ -617,6 +642,8 @@ async def main(): # common_subparsers = main_parser.add_subparsers(title='Query type', dest='query_type', help='Select one of: ') # COMMON ARGUMENTS required_named = main_parser.add_argument_group("Required named arguments") + # TODO: check if choices should be a list instead + required_named.add_argument("--format", type=str, default='mvsx', choices=OutputFormatsT, required=True, help="Produce CVSX or mvsx file as an output") required_named.add_argument("--db_path", type=str, required=True, help="Path to db") # TODO: exclude extension required_named.add_argument( From 87e535932d5ce66ac9e86a0b4fb8758795408485 Mon Sep 17 00:00:00 2001 From: xrohac <167304803+xrohac@users.noreply.github.com> Date: Sat, 9 Nov 2024 14:47:12 +0100 Subject: [PATCH 2/3] env update for m2 apple processor (#112) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Patrik Roháč --- environment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yaml b/environment.yaml index 9353b2fd..948cd076 100644 --- a/environment.yaml +++ b/environment.yaml @@ -41,7 +41,7 @@ dependencies: - sfftk==0.5.5.dev1 - sfftk-rw==0.7.1 - SimpleParse @ git+https://github.com/mcfletch/simpleparse.git@57c8d734bdc165581fbacfeecabe25a66c3452a4 - - tensorstore==0.1.18 + - tensorstore - killport - Pillow - typer==0.7.0 From ba734f6db9b8f5ef8598f8cf8c9375e35685c7b0 Mon Sep 17 00:00:00 2001 From: Aliaksei Chareshneu Date: Tue, 19 Nov 2024 13:26:39 +0100 Subject: [PATCH 3/3] molviewspec support (WIP, unfinished, not tested); --- db/cellstar_db/models.py | 10 + .../write_in_memory_zip.py | 20 + vs_toolkit/vs_toolkit.py | 472 ++++++++++++------ 3 files changed, 346 insertions(+), 156 deletions(-) create mode 100644 preprocessor/cellstar_preprocessor/tools/write_in_memory_zip/write_in_memory_zip.py diff --git a/db/cellstar_db/models.py b/db/cellstar_db/models.py index 39038eb1..7996e78d 100644 --- a/db/cellstar_db/models.py +++ b/db/cellstar_db/models.py @@ -588,3 +588,13 @@ def detail_lvl_to_fraction(self) -> dict: mesh simplification ratios (fractions, e.g. 0.8) as values """ ... + + +class ModelArbitraryTypes(BaseModel): + class Config: + arbitrary_types_allowed = True + + +class Asset(ModelArbitraryTypes): + filename: str + data: str | bytes diff --git a/preprocessor/cellstar_preprocessor/tools/write_in_memory_zip/write_in_memory_zip.py b/preprocessor/cellstar_preprocessor/tools/write_in_memory_zip/write_in_memory_zip.py new file mode 100644 index 00000000..97a8616b --- /dev/null +++ b/preprocessor/cellstar_preprocessor/tools/write_in_memory_zip/write_in_memory_zip.py @@ -0,0 +1,20 @@ +import io +from pathlib import Path +import zipfile + +from cellstar_db.models import Asset + +def write_in_memory_zip(output_path: Path, assets: list[Asset]): + zip_buffer = io.BytesIO() + + + with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED, False) as zip_file: + # for file_name, data in [('1.txt', io.BytesIO(b'111')), + # ('2.txt', io.BytesIO(b'222'))]: + for item in assets: + # zip_file.writestr(item.filename, data.getvalue()) + zip_file.writestr(item.filename, item.data) + + with open(str(output_path.resolve()), 'wb') as f: + f.write(zip_buffer.getvalue()) + \ No newline at end of file diff --git a/vs_toolkit/vs_toolkit.py b/vs_toolkit/vs_toolkit.py index 5526908e..bd867869 100644 --- a/vs_toolkit/vs_toolkit.py +++ b/vs_toolkit/vs_toolkit.py @@ -1,12 +1,23 @@ import argparse import asyncio +from enum import Enum import io import json -from dataclasses import dataclass from pathlib import Path -from typing import Literal, Optional, Protocol, TypedDict, Union +from cellstar_db.models import Asset, ModelArbitraryTypes +from cellstar_preprocessor.tools.write_in_memory_zip.write_in_memory_zip import write_in_memory_zip +from typing_extensions import Any, Literal, Optional, Protocol, TypedDict, Union from zipfile import ZIP_DEFLATED, ZipFile -from molviewspec.builder import create_builder +from molviewspec.builder import create_builder, Root +from molviewspec.nodes import ParseFormatT +# from ...mvs_volseg.molviewspec.molviewspec.builder import create_builder, Root +# from ...mvs_volseg.molviewspec.molviewspec.nodes import ParseFormatT +# TODO: below are the correct imports assuming that volseg MVS PR is merged +# from molviewspec.builder import create_builder, Root +# from molviewspec.nodes import ParseFormatT + +# TODO: refactor to separate files +# app.py, models.py, query.py, helpers.py, other if needed from cellstar_db.file_system.db import FileSystemVolumeServerDB from cellstar_db.models import Metadata, TimeInfo @@ -19,14 +30,17 @@ get_volume_cell_query, ) from cellstar_query.requests import MetadataRequest +from pydantic import BaseModel, ConfigDict, Field DEFAULT_MAX_POINTS = 1000000000000 -INDEX_JSON_FILENAME = "index.json" +CVSX_INDEX_JSON_FILENAME = "index.json" DEFAULT_MESH_DETAIL_LVL = 5 +MVSJ_INDEX_JSON_FILENAME = "index.mvsj" +ResponseTypesWithJSONOutputT = Literal['annotations', 'metadata', 'query'] +ResponseTypesWithJSONOutputL = ['annotations', 'metadata', 'query'] -@dataclass -class QueryResponse: +class QueryResponse(BaseModel): # NOTE: list[tuple[str, bytes]] - list of tuples where str = segment_id, bytes - bcif # TODO: response is bytes or str or? response: Union[bytes, list[tuple[str, bytes]], str, dict] @@ -39,21 +53,10 @@ class QueryResponse: "metadata", "query", ] + # TODO: model for this? input_data: dict - -# key - file name, value - -# data on each file (segmentation_id, timeframe_index, kind) -# need a way to find files in array of tuples -# at the frontend, in which each tuple is (filename, filedata) -# can organize index json such that there are already categories -# volumes, segmentations, etc. -# key volume has value that is list of dicts -# each dict has keys filename, segmentation_id, timeframe_index, kind, channel_id -# etc. depending on what the file is - - -class CVSXFileInfo(TypedDict): +class CVSXFileInfo(BaseModel): type: Literal[ "volume", "lattice", @@ -89,7 +92,9 @@ class GeometricSegmentationFileInfo(SegmentationFileInfo): # careful with meshSegmentations and geometricSegmentations - +# Pydantic model does not work here, check why (some attr is with tuple or?) +# IndexError: tuple index out of range +# because of using [] to assign attrs apparently class CVSXFilesIndex(TypedDict): # file name to info mapping volumes: dict[str, VolumeFileInfo] @@ -107,8 +112,25 @@ class CVSXFilesIndex(TypedDict): metadata: str query: str +class VolumeAsset(ModelArbitraryTypes): + # data: io.TextIOWrapper + data: bytes + filename: str + +class JSONAsset(ModelArbitraryTypes): + data: str + filename: str -class JsonQueryParams(TypedDict): + +# TODO: BaseModel here produces IndexError: tuple index out of range +class MVSXAssets(TypedDict): + metadata_asset: JSONAsset + annotations_asset: JSONAsset + volume_assets: list[VolumeAsset] + query_asset: JSONAsset + cvsx_index: CVSXFilesIndex + +class JsonQueryParams(BaseModel): segmentation_kind: Optional[Literal["mesh", "lattice", "geometric-segmentation"]] entry_id: str source_db: str @@ -119,13 +141,16 @@ class JsonQueryParams(TypedDict): detail_lvl: Optional[int] max_points: Optional[int] -OutputFormatsT = Literal['mvsx', 'cvsx'] -class ParsedArgs(TypedDict): +class OutputFormatsEnum(Enum): + mvsx = 'mvsx' + cvsx = 'cvsx' + +class ParsedArgs(BaseModel): db_path: Path out: Path json_params_path: Path - format: OutputFormatsT + format: OutputFormatsEnum def _parse_argparse_args(args: argparse.Namespace): # TODO: validate similar to query app @@ -138,10 +163,11 @@ def _parse_argparse_args(args: argparse.Namespace): def _parse_json_params(json_path: Path): - with open(json_path.resolve(), "r", encoding="utf-8") as f: - raw_json: JsonQueryParams = json.load(f) + return JsonQueryParams.parse_file(json_path) + # with open(json_path.resolve(), "r", encoding="utf-8") as f: + # raw_json: JsonQueryParams = JsonQueryParams.parse_file.load(f) - return raw_json + # return raw_json # TODO: QueryResponse @@ -149,7 +175,7 @@ class QueryTaskBase(Protocol): async def execute(self) -> QueryResponse: ... -class QueryTaskParams(TypedDict): +class QueryTaskParams(ModelArbitraryTypes): # parsed_args: volume_server: VolumeServerService # custom_params: Optional[QuerySpecificParams] @@ -304,135 +330,269 @@ def _get_volume_timeframes_from_metadata(grid_metadata: Metadata): return list(range(start, end + 1)) -def _to_mvsx(responses: list[QueryResponse], indexJson: CVSXFilesIndex): - """ - Only works for a volume query - """ - builder = create_builder() - # TODO: replacement for download? - ( - # TODO: local url in MVSX archive - builder.download(url='https://raw.githack.com/molstar/molstar-volseg/master/test-data/preprocessor/sample_volumes/emdb/EMD-1832.map') - .parse(format='map') - .raw_volume(source='map') - .volume_representation(type="isosurface") - .color(color='aqua') - # .color_from_uri(schema='volume', uri='./_examples/_input/volseg_example_annotations.json', format='json') - # .transparency(transparency=0.4) +def _json_response_to_asset(r: QueryResponse, indexJson: CVSXFilesIndex): + type = r.type + response = r.response + assert type in ResponseTypesWithJSONOutputL, f'Response type {type} is not supported by this function' + + # name should be created based on type and input data + name = f"{type}.json" + indexJson[type] = name + dumped_JSON: str = json.dumps(response, ensure_ascii=False, indent=4) + asset = JSONAsset( + data=dumped_JSON, + filename=name ) + return asset, indexJson -def _write_to_file(responses: list[QueryResponse], out_path: Path, format: OutputFormatsT): - # TODO: add here index.json with data on each file +def _volume_response_to_asset(r: QueryResponse, indexJson: CVSXFilesIndex): + t = r.type + response = r.response + input_data = r.input_data + assert t == 'volume', f'Response type {t} is not supported by this function' + + # name should be created based on type and input data + channel_id = input_data["channel_id"] + time = input_data["time"] + name = f"{t}_{channel_id}_{time}.bcif" + asset = VolumeAsset( + data=response, + filename=name + ) + + info: VolumeFileInfo = { + "channelId": channel_id, + "timeframeIndex": time, + "type": t, + } + if not "volumes" in indexJson: + indexJson["volumes"] = {} + indexJson["volumes"][name] = info + + return asset, indexJson + +def _create_assets(responses: list[QueryResponse], indexJson: CVSXFilesIndex): + volume_assets: list[VolumeAsset] = [] + for r in responses: + # TODO: use metadata for segmentation node + type = r.type + match type: + case 'volume': + volume_asset, indexJson = _volume_response_to_asset(r, indexJson) + volume_assets.append(volume_asset) + case 'metadata': + metadata_asset, indexJson = _json_response_to_asset(r, indexJson) + case 'annotations': + annotations_asset, indexJson = _json_response_to_asset(r, indexJson) + case 'query': + query_asset, indexJson = _json_response_to_asset(r, indexJson) + case _: + raise Exception('Response type: ' + type + ' is not supported yet.') + + msvx_assets = MVSXAssets( + volume_assets=volume_assets, + cvsx_index=indexJson, + metadata_asset=metadata_asset, + annotations_asset=annotations_asset, + query_asset=query_asset + ) + return msvx_assets + +def _create_mvsj_tree_builder(assets: MVSXAssets, assets_folder_name: str): + builder = create_builder() + if assets['volume_assets'] is not None: + volume_assets = assets['volume_assets'] + for va in volume_assets: + ( + builder.download(url=f'./{assets_folder_name}/{va.filename}') + # TODO: new format + .parse(format='vs-density') + # TODO: new node + new params + .vs_volume() + # TODO: new parent for volume_representation and related functionality + .volume_representation(type="isosurface") + .color(color='aqua') + ) + # TODO: segmentations, geometric etc. + + return builder + + +def _create_index_mvsj(assets: MVSXAssets, assets_folder_name: str): + builder = _create_mvsj_tree_builder(assets, assets_folder_name) + return builder.get_state() + + + +def _create_mvsx(mvsx_assets: MVSXAssets, index_mvsj_json_str: str, out_path: Path, assets_folder_name: str): + # file = io.BytesIO() + # with ZipFile(file, "w", ZIP_DEFLATED) as zip_file: + # dumped_cvsx_index_JSON: str = json.dumps(mvsx_assets['cvsx_index'], ensure_ascii=False, indent=4) + # # TODO: check if works + # # TODO: other assets + # if mvsx_assets['volume_assets'] is not None: + # for va in mvsx_assets['volume_assets']: + # zip_file.writestr(f'{assets_folder_name}/{va.filename}', data=va.data) + + # zip_file.writestr(f'{assets_folder_name}/{CVSX_INDEX_JSON_FILENAME}', data=dumped_cvsx_index_JSON) + # zip_file.writestr(MVSJ_INDEX_JSON_FILENAME, data=index_mvsj_json_str) + + # zip_data = file.getvalue() + + assert out_path.suffix == '.mvsx' + + # with open(str(out_path.resolve()), "wb") as f: + # f.write(zip_data) + assets: list[Asset] = [] + + # CVSX index + cvsx_index = Asset( + filename=f'{assets_folder_name}/{CVSX_INDEX_JSON_FILENAME}', + data=json.dumps(mvsx_assets['cvsx_index'], ensure_ascii=False, indent=4) + ) + assets.append(cvsx_index) + + # MVSJ index + mvsj_index = Asset( + filename=MVSJ_INDEX_JSON_FILENAME, + data=index_mvsj_json_str + ) + assets.append(mvsj_index) + + # Volume data + if mvsx_assets['volume_assets'] is not None: + for va in mvsx_assets['volume_assets']: + assets.append( + Asset( + # TODO: check if works + filename=f'{assets_folder_name}/{va.filename}', + data=va.data + ) + ) + + + + write_in_memory_zip(output_path=out_path, assets=assets) + +def _write_mvsx_to_file(responses: list[QueryResponse], indexJson: CVSXFilesIndex, out_path: Path, assets_folder_name: str): + """ + Only works for a volume query + """ + assets = _create_assets(responses, indexJson) + index_mvsj_json_str = _create_index_mvsj(assets, assets_folder_name) + mvsx_file = _create_mvsx(assets, index_mvsj_json_str, out_path, assets_folder_name) + return mvsx_file + +# def _write_mvsx_to_file(file: io.BytesIO, responses: list[QueryResponse], indexJson: CVSXFilesIndex, out_path: Path): + # _to_mvsx(responses, indexJson, out_path, 'assets') + +def _write_cvsx_to_file(file: io.BytesIO, responses: list[QueryResponse], indexJson: CVSXFilesIndex, out_path: Path): + with ZipFile(file, "w", ZIP_DEFLATED) as zip_file: + for r in responses: + response = r.response + type = r.type + input_data = r.input_data + + if type == "volume": + # name should be created based on type and input data + channel_id = input_data["channel_id"] + time = input_data["time"] + name = f"{type}_{channel_id}_{time}.bcif" + zip_file.writestr(name, response) + info: VolumeFileInfo = { + "channelId": channel_id, + "timeframeIndex": time, + "type": type, + } + if not "volumes" in indexJson: + indexJson["volumes"] = {} + + indexJson["volumes"][name] = info + + elif type == "lattice": + segmentation_id = input_data["segmentation_id"] + time = input_data["time"] + name = f"{type}_{segmentation_id}_{time}.bcif" + + info: LatticeSegmentationFileInfo = { + "timeframeIndex": time, + "type": type, + "segmentationId": segmentation_id, + } + if not "latticeSegmentations" in indexJson: + indexJson["latticeSegmentations"] = {} + + indexJson["latticeSegmentations"][name] = info + + zip_file.writestr(name, response) + elif type == "mesh": + # how to include segmentation id here? + segmentation_id = input_data["segmentation_id"] + time = input_data["time"] + meshes: list[str, bytes] = response + filenames = [] + for segment_id, content in meshes: + filename = f"{type}_{segment_id}_{segmentation_id}_{time}.bcif" + filenames.append(filename) + zip_file.writestr(filename, content) + + info: MeshSegmentationFilesInfo = { + "segmentationId": segmentation_id, + "timeframeIndex": time, + "segmentsFilenames": filenames, + "type": type, + } + + if not "meshSegmentations" in indexJson: + indexJson["meshSegmentations"] = [] + + indexJson["meshSegmentations"].append(info) + + elif type == "annotations" or type == "metadata" or type == "query": + name = f"{type}.json" + dumped_JSON: str = json.dumps(response, ensure_ascii=False, indent=4) + zip_file.writestr(name, data=dumped_JSON) + indexJson[type] = name + elif type == "geometric-segmentation": + segmentation_id = input_data["segmentation_id"] + time = input_data["time"] + name = f"{type}_{segmentation_id}_{time}.json" + dumped_JSON: str = json.dumps(response, ensure_ascii=False, indent=4) + zip_file.writestr(name, data=dumped_JSON) + + info: GeometricSegmentationFileInfo = { + "segmentationId": segmentation_id, + "timeframeIndex": time, + "type": type, + } + if not "geometricSegmentations" in indexJson: + indexJson["geometricSegmentations"] = {} + + indexJson["geometricSegmentations"][name] = info + + dumped_index_JSON: str = json.dumps(indexJson, ensure_ascii=False, indent=4) + zip_file.writestr(CVSX_INDEX_JSON_FILENAME, data=dumped_index_JSON) + + # print(indexJson) + zip_data = file.getvalue() + + with open(str(out_path.resolve()), "wb") as f: + f.write(zip_data) + +def _write_to_file(responses: list[QueryResponse], out_path: Path, format: OutputFormatsEnum): file = io.BytesIO() indexJson: CVSXFilesIndex = { - # 'volumes': {}, - # 'latticeSegmentations': {}, - # 'meshSegmentations': [], "metadata": None, - # 'annotations': None, - # 'geometricSegmentations': [], "query": None, } - + assets_folder_name = 'assets' match format: - case 'cvsx': - with ZipFile(file, "w", ZIP_DEFLATED) as zip_file: - for r in responses: - response = r.response - type = r.type - input_data = r.input_data - - if type == "volume": - # name should be created based on type and input data - channel_id = input_data["channel_id"] - time = input_data["time"] - name = f"{type}_{channel_id}_{time}.bcif" - zip_file.writestr(name, response) - info: VolumeFileInfo = { - "channelId": channel_id, - "timeframeIndex": time, - "type": type, - } - if not "volumes" in indexJson: - indexJson["volumes"] = {} - - indexJson["volumes"][name] = info - - elif type == "lattice": - segmentation_id = input_data["segmentation_id"] - time = input_data["time"] - name = f"{type}_{segmentation_id}_{time}.bcif" - - info: LatticeSegmentationFileInfo = { - "timeframeIndex": time, - "type": type, - "segmentationId": segmentation_id, - } - if not "latticeSegmentations" in indexJson: - indexJson["latticeSegmentations"] = {} - - indexJson["latticeSegmentations"][name] = info - - zip_file.writestr(name, response) - elif type == "mesh": - # how to include segmentation id here? - segmentation_id = input_data["segmentation_id"] - time = input_data["time"] - meshes: list[str, bytes] = response - filenames = [] - for segment_id, content in meshes: - filename = f"{type}_{segment_id}_{segmentation_id}_{time}.bcif" - filenames.append(filename) - zip_file.writestr(filename, content) - - info: MeshSegmentationFilesInfo = { - "segmentationId": segmentation_id, - "timeframeIndex": time, - "segmentsFilenames": filenames, - "type": type, - } - - if not "meshSegmentations" in indexJson: - indexJson["meshSegmentations"] = [] - - indexJson["meshSegmentations"].append(info) - - elif type == "annotations" or type == "metadata" or type == "query": - name = f"{type}.json" - dumped_JSON: str = json.dumps(response, ensure_ascii=False, indent=4) - zip_file.writestr(name, data=dumped_JSON) - indexJson[type] = name - # TODO: change geometric-segmentation - elif type == "geometric-segmentation": - segmentation_id = input_data["segmentation_id"] - time = input_data["time"] - name = f"{type}_{segmentation_id}_{time}.json" - dumped_JSON: str = json.dumps(response, ensure_ascii=False, indent=4) - zip_file.writestr(name, data=dumped_JSON) - - info: GeometricSegmentationFileInfo = { - "segmentationId": segmentation_id, - "timeframeIndex": time, - "type": type, - } - if not "geometricSegmentations" in indexJson: - indexJson["geometricSegmentations"] = {} - - indexJson["geometricSegmentations"][name] = info - - dumped_index_JSON: str = json.dumps(indexJson, ensure_ascii=False, indent=4) - zip_file.writestr(INDEX_JSON_FILENAME, data=dumped_index_JSON) - - # print(indexJson) - zip_data = file.getvalue() - - with open(str(out_path.resolve()), "wb") as f: - f.write(zip_data) - case 'mvsx': - mvsx = _to_mvsx(responses, indexJson) - file = _write_file(out_path, format) + case OutputFormatsEnum.cvsx: + _write_cvsx_to_file(file=file, responses=responses, indexJson=indexJson, out_path=out_path) + case OutputFormatsEnum.mvsx: + _write_mvsx_to_file(file=file, responses=responses, indexJson=indexJson, out_path=out_path, assets_folder_name=assets_folder_name) case _: raise Exception(f'Format {format} is not supported.') @@ -541,12 +701,12 @@ async def query(args: argparse.Namespace): # 1. Parse argparse args parsed_args = _parse_argparse_args(args) # 2. Parse json params - parsed_params = _parse_json_params(parsed_args["json_params_path"]) + parsed_params = _parse_json_params(parsed_args.json_params_path) - entry_id = parsed_params["entry_id"] - source_db = parsed_params["source_db"] + entry_id = parsed_params.entry_id + source_db = parsed_params.source_db - db = FileSystemVolumeServerDB(folder=Path(parsed_args["db_path"])) + db = FileSystemVolumeServerDB(folder=Path(parsed_args.db_path)) # initialize server volume_server = VolumeServerService(db) @@ -554,7 +714,7 @@ async def query(args: argparse.Namespace): # 3. query metadata metadata = await volume_server.get_metadata( req=MetadataRequest( - source=parsed_params["source_db"], structure_id=parsed_params["entry_id"] + source=parsed_params.source_db, structure_id=parsed_params.entry_id ) ) grid_metadata: Metadata = metadata["grid"] @@ -631,7 +791,7 @@ async def query(args: argparse.Namespace): r = await query.execute() responses.append(r) - _write_to_file(responses, parsed_args["out"], parsed_args["format"]) + _write_to_file(responses, parsed_args.out, parsed_args.format) async def main(): @@ -641,16 +801,16 @@ async def main(): # common_subparsers = main_parser.add_subparsers(title='Query type', dest='query_type', help='Select one of: ') # COMMON ARGUMENTS + # TODO: check if extension and format are in agreement (better) or exclude extension (worse) required_named = main_parser.add_argument_group("Required named arguments") # TODO: check if choices should be a list instead - required_named.add_argument("--format", type=str, default='mvsx', choices=OutputFormatsT, required=True, help="Produce CVSX or mvsx file as an output") + required_named.add_argument("--format", type=str, default='mvsx', choices=['mvsx', 'cvsx'], required=True, help="Produce CVSX or mvsx file as an output") required_named.add_argument("--db_path", type=str, required=True, help="Path to db") - # TODO: exclude extension required_named.add_argument( "--out", type=str, required=True, help="Path to output file including extension" ) required_named.add_argument( - "--json-params-path", + "--json_params_path", required=True, type=str, help="Path to .json file with query parameters",