diff --git a/.gitignore b/.gitignore index d9cb24a2..1901fb33 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ ### Custom +working_folder/* +preprocessor/cellstar_preprocessor/tools/open_wrl_file/* molstar-extension/site/* molstar-extension/node_modules/ molstar-extension/debug.log @@ -317,3 +319,4 @@ dmypy.json # Cython debug symbols cython_debug/ +working_folder/custom-actin/.zattrs diff --git a/db/cellstar_db/models.py b/db/cellstar_db/models.py index 39038eb1..7996e78d 100644 --- a/db/cellstar_db/models.py +++ b/db/cellstar_db/models.py @@ -588,3 +588,13 @@ def detail_lvl_to_fraction(self) -> dict: mesh simplification ratios (fractions, e.g. 0.8) as values """ ... + + +class ModelArbitraryTypes(BaseModel): + class Config: + arbitrary_types_allowed = True + + +class Asset(ModelArbitraryTypes): + filename: str + data: str | bytes diff --git a/environment.yaml b/environment.yaml index 9353b2fd..948cd076 100644 --- a/environment.yaml +++ b/environment.yaml @@ -41,7 +41,7 @@ dependencies: - sfftk==0.5.5.dev1 - sfftk-rw==0.7.1 - SimpleParse @ git+https://github.com/mcfletch/simpleparse.git@57c8d734bdc165581fbacfeecabe25a66c3452a4 - - tensorstore==0.1.18 + - tensorstore - killport - Pillow - typer==0.7.0 diff --git a/preprocessor/cellstar_preprocessor/tools/write_in_memory_zip/write_in_memory_zip.py b/preprocessor/cellstar_preprocessor/tools/write_in_memory_zip/write_in_memory_zip.py new file mode 100644 index 00000000..97a8616b --- /dev/null +++ b/preprocessor/cellstar_preprocessor/tools/write_in_memory_zip/write_in_memory_zip.py @@ -0,0 +1,20 @@ +import io +from pathlib import Path +import zipfile + +from cellstar_db.models import Asset + +def write_in_memory_zip(output_path: Path, assets: list[Asset]): + zip_buffer = io.BytesIO() + + + with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED, False) as zip_file: + # for file_name, data in [('1.txt', io.BytesIO(b'111')), + # ('2.txt', io.BytesIO(b'222'))]: + for item in assets: + # zip_file.writestr(item.filename, data.getvalue()) + zip_file.writestr(item.filename, item.data) + + with open(str(output_path.resolve()), 'wb') as f: + f.write(zip_buffer.getvalue()) + \ No newline at end of file diff --git a/vs_toolkit/extra_models.py b/vs_toolkit/extra_models.py new file mode 100644 index 00000000..0ac08859 --- /dev/null +++ b/vs_toolkit/extra_models.py @@ -0,0 +1,4 @@ +import molviewspec + +class MVSJ: + pass \ No newline at end of file diff --git a/vs_toolkit/vs_toolkit.py b/vs_toolkit/vs_toolkit.py index 211b1321..bd867869 100644 --- a/vs_toolkit/vs_toolkit.py +++ b/vs_toolkit/vs_toolkit.py @@ -1,11 +1,23 @@ import argparse import asyncio +from enum import Enum import io import json -from dataclasses import dataclass from pathlib import Path -from typing import Literal, Optional, Protocol, TypedDict, Union +from cellstar_db.models import Asset, ModelArbitraryTypes +from cellstar_preprocessor.tools.write_in_memory_zip.write_in_memory_zip import write_in_memory_zip +from typing_extensions import Any, Literal, Optional, Protocol, TypedDict, Union from zipfile import ZIP_DEFLATED, ZipFile +from molviewspec.builder import create_builder, Root +from molviewspec.nodes import ParseFormatT +# from ...mvs_volseg.molviewspec.molviewspec.builder import create_builder, Root +# from ...mvs_volseg.molviewspec.molviewspec.nodes import ParseFormatT +# TODO: below are the correct imports assuming that volseg MVS PR is merged +# from molviewspec.builder import create_builder, Root +# from molviewspec.nodes import ParseFormatT + +# TODO: refactor to separate files +# app.py, models.py, query.py, helpers.py, other if needed from cellstar_db.file_system.db import FileSystemVolumeServerDB from cellstar_db.models import Metadata, TimeInfo @@ -18,14 +30,17 @@ get_volume_cell_query, ) from cellstar_query.requests import MetadataRequest +from pydantic import BaseModel, ConfigDict, Field DEFAULT_MAX_POINTS = 1000000000000 -INDEX_JSON_FILENAME = "index.json" +CVSX_INDEX_JSON_FILENAME = "index.json" DEFAULT_MESH_DETAIL_LVL = 5 +MVSJ_INDEX_JSON_FILENAME = "index.mvsj" +ResponseTypesWithJSONOutputT = Literal['annotations', 'metadata', 'query'] +ResponseTypesWithJSONOutputL = ['annotations', 'metadata', 'query'] -@dataclass -class QueryResponse: +class QueryResponse(BaseModel): # NOTE: list[tuple[str, bytes]] - list of tuples where str = segment_id, bytes - bcif # TODO: response is bytes or str or? response: Union[bytes, list[tuple[str, bytes]], str, dict] @@ -38,21 +53,10 @@ class QueryResponse: "metadata", "query", ] + # TODO: model for this? input_data: dict - -# key - file name, value - -# data on each file (segmentation_id, timeframe_index, kind) -# need a way to find files in array of tuples -# at the frontend, in which each tuple is (filename, filedata) -# can organize index json such that there are already categories -# volumes, segmentations, etc. -# key volume has value that is list of dicts -# each dict has keys filename, segmentation_id, timeframe_index, kind, channel_id -# etc. depending on what the file is - - -class CVSXFileInfo(TypedDict): +class CVSXFileInfo(BaseModel): type: Literal[ "volume", "lattice", @@ -88,7 +92,9 @@ class GeometricSegmentationFileInfo(SegmentationFileInfo): # careful with meshSegmentations and geometricSegmentations - +# Pydantic model does not work here, check why (some attr is with tuple or?) +# IndexError: tuple index out of range +# because of using [] to assign attrs apparently class CVSXFilesIndex(TypedDict): # file name to info mapping volumes: dict[str, VolumeFileInfo] @@ -106,8 +112,25 @@ class CVSXFilesIndex(TypedDict): metadata: str query: str +class VolumeAsset(ModelArbitraryTypes): + # data: io.TextIOWrapper + data: bytes + filename: str -class JsonQueryParams(TypedDict): +class JSONAsset(ModelArbitraryTypes): + data: str + filename: str + + +# TODO: BaseModel here produces IndexError: tuple index out of range +class MVSXAssets(TypedDict): + metadata_asset: JSONAsset + annotations_asset: JSONAsset + volume_assets: list[VolumeAsset] + query_asset: JSONAsset + cvsx_index: CVSXFilesIndex + +class JsonQueryParams(BaseModel): segmentation_kind: Optional[Literal["mesh", "lattice", "geometric-segmentation"]] entry_id: str source_db: str @@ -119,11 +142,15 @@ class JsonQueryParams(TypedDict): max_points: Optional[int] -class ParsedArgs(TypedDict): +class OutputFormatsEnum(Enum): + mvsx = 'mvsx' + cvsx = 'cvsx' + +class ParsedArgs(BaseModel): db_path: Path out: Path json_params_path: Path - + format: OutputFormatsEnum def _parse_argparse_args(args: argparse.Namespace): # TODO: validate similar to query app @@ -131,14 +158,16 @@ def _parse_argparse_args(args: argparse.Namespace): db_path=Path(args.db_path), out=Path(args.out), json_params_path=Path(args.json_params_path), + format=str(args.format) ) def _parse_json_params(json_path: Path): - with open(json_path.resolve(), "r", encoding="utf-8") as f: - raw_json: JsonQueryParams = json.load(f) + return JsonQueryParams.parse_file(json_path) + # with open(json_path.resolve(), "r", encoding="utf-8") as f: + # raw_json: JsonQueryParams = JsonQueryParams.parse_file.load(f) - return raw_json + # return raw_json # TODO: QueryResponse @@ -146,7 +175,7 @@ class QueryTaskBase(Protocol): async def execute(self) -> QueryResponse: ... -class QueryTaskParams(TypedDict): +class QueryTaskParams(ModelArbitraryTypes): # parsed_args: volume_server: VolumeServerService # custom_params: Optional[QuerySpecificParams] @@ -301,22 +330,164 @@ def _get_volume_timeframes_from_metadata(grid_metadata: Metadata): return list(range(start, end + 1)) - -def _write_to_file(responses: list[QueryResponse], out_path: Path): - # TODO: add here index.json with data on each file - - # should be similar to create in memory zip - file = io.BytesIO() - - indexJson: CVSXFilesIndex = { - # 'volumes': {}, - # 'latticeSegmentations': {}, - # 'meshSegmentations': [], - "metadata": None, - # 'annotations': None, - # 'geometricSegmentations': [], - "query": None, +def _json_response_to_asset(r: QueryResponse, indexJson: CVSXFilesIndex): + type = r.type + response = r.response + assert type in ResponseTypesWithJSONOutputL, f'Response type {type} is not supported by this function' + + # name should be created based on type and input data + name = f"{type}.json" + indexJson[type] = name + dumped_JSON: str = json.dumps(response, ensure_ascii=False, indent=4) + asset = JSONAsset( + data=dumped_JSON, + filename=name + ) + return asset, indexJson + +def _volume_response_to_asset(r: QueryResponse, indexJson: CVSXFilesIndex): + t = r.type + response = r.response + input_data = r.input_data + assert t == 'volume', f'Response type {t} is not supported by this function' + + # name should be created based on type and input data + channel_id = input_data["channel_id"] + time = input_data["time"] + name = f"{t}_{channel_id}_{time}.bcif" + asset = VolumeAsset( + data=response, + filename=name + ) + + info: VolumeFileInfo = { + "channelId": channel_id, + "timeframeIndex": time, + "type": t, } + if not "volumes" in indexJson: + indexJson["volumes"] = {} + + indexJson["volumes"][name] = info + + return asset, indexJson + +def _create_assets(responses: list[QueryResponse], indexJson: CVSXFilesIndex): + volume_assets: list[VolumeAsset] = [] + for r in responses: + # TODO: use metadata for segmentation node + type = r.type + match type: + case 'volume': + volume_asset, indexJson = _volume_response_to_asset(r, indexJson) + volume_assets.append(volume_asset) + case 'metadata': + metadata_asset, indexJson = _json_response_to_asset(r, indexJson) + case 'annotations': + annotations_asset, indexJson = _json_response_to_asset(r, indexJson) + case 'query': + query_asset, indexJson = _json_response_to_asset(r, indexJson) + case _: + raise Exception('Response type: ' + type + ' is not supported yet.') + + msvx_assets = MVSXAssets( + volume_assets=volume_assets, + cvsx_index=indexJson, + metadata_asset=metadata_asset, + annotations_asset=annotations_asset, + query_asset=query_asset + ) + return msvx_assets + +def _create_mvsj_tree_builder(assets: MVSXAssets, assets_folder_name: str): + builder = create_builder() + if assets['volume_assets'] is not None: + volume_assets = assets['volume_assets'] + for va in volume_assets: + ( + builder.download(url=f'./{assets_folder_name}/{va.filename}') + # TODO: new format + .parse(format='vs-density') + # TODO: new node + new params + .vs_volume() + # TODO: new parent for volume_representation and related functionality + .volume_representation(type="isosurface") + .color(color='aqua') + ) + # TODO: segmentations, geometric etc. + + return builder + + +def _create_index_mvsj(assets: MVSXAssets, assets_folder_name: str): + builder = _create_mvsj_tree_builder(assets, assets_folder_name) + return builder.get_state() + + + +def _create_mvsx(mvsx_assets: MVSXAssets, index_mvsj_json_str: str, out_path: Path, assets_folder_name: str): + # file = io.BytesIO() + # with ZipFile(file, "w", ZIP_DEFLATED) as zip_file: + # dumped_cvsx_index_JSON: str = json.dumps(mvsx_assets['cvsx_index'], ensure_ascii=False, indent=4) + # # TODO: check if works + # # TODO: other assets + # if mvsx_assets['volume_assets'] is not None: + # for va in mvsx_assets['volume_assets']: + # zip_file.writestr(f'{assets_folder_name}/{va.filename}', data=va.data) + + # zip_file.writestr(f'{assets_folder_name}/{CVSX_INDEX_JSON_FILENAME}', data=dumped_cvsx_index_JSON) + # zip_file.writestr(MVSJ_INDEX_JSON_FILENAME, data=index_mvsj_json_str) + + # zip_data = file.getvalue() + + assert out_path.suffix == '.mvsx' + + # with open(str(out_path.resolve()), "wb") as f: + # f.write(zip_data) + assets: list[Asset] = [] + + # CVSX index + cvsx_index = Asset( + filename=f'{assets_folder_name}/{CVSX_INDEX_JSON_FILENAME}', + data=json.dumps(mvsx_assets['cvsx_index'], ensure_ascii=False, indent=4) + ) + assets.append(cvsx_index) + + # MVSJ index + mvsj_index = Asset( + filename=MVSJ_INDEX_JSON_FILENAME, + data=index_mvsj_json_str + ) + assets.append(mvsj_index) + + # Volume data + if mvsx_assets['volume_assets'] is not None: + for va in mvsx_assets['volume_assets']: + assets.append( + Asset( + # TODO: check if works + filename=f'{assets_folder_name}/{va.filename}', + data=va.data + ) + ) + + + + write_in_memory_zip(output_path=out_path, assets=assets) + +def _write_mvsx_to_file(responses: list[QueryResponse], indexJson: CVSXFilesIndex, out_path: Path, assets_folder_name: str): + """ + Only works for a volume query + """ + assets = _create_assets(responses, indexJson) + index_mvsj_json_str = _create_index_mvsj(assets, assets_folder_name) + mvsx_file = _create_mvsx(assets, index_mvsj_json_str, out_path, assets_folder_name) + return mvsx_file + +# def _write_mvsx_to_file(file: io.BytesIO, responses: list[QueryResponse], indexJson: CVSXFilesIndex, out_path: Path): + # _to_mvsx(responses, indexJson, out_path, 'assets') + +def _write_cvsx_to_file(file: io.BytesIO, responses: list[QueryResponse], indexJson: CVSXFilesIndex, out_path: Path): with ZipFile(file, "w", ZIP_DEFLATED) as zip_file: for r in responses: response = r.response @@ -383,7 +554,6 @@ def _write_to_file(responses: list[QueryResponse], out_path: Path): dumped_JSON: str = json.dumps(response, ensure_ascii=False, indent=4) zip_file.writestr(name, data=dumped_JSON) indexJson[type] = name - # TODO: change geometric-segmentation elif type == "geometric-segmentation": segmentation_id = input_data["segmentation_id"] time = input_data["time"] @@ -402,7 +572,7 @@ def _write_to_file(responses: list[QueryResponse], out_path: Path): indexJson["geometricSegmentations"][name] = info dumped_index_JSON: str = json.dumps(indexJson, ensure_ascii=False, indent=4) - zip_file.writestr(INDEX_JSON_FILENAME, data=dumped_index_JSON) + zip_file.writestr(CVSX_INDEX_JSON_FILENAME, data=dumped_index_JSON) # print(indexJson) zip_data = file.getvalue() @@ -410,6 +580,21 @@ def _write_to_file(responses: list[QueryResponse], out_path: Path): with open(str(out_path.resolve()), "wb") as f: f.write(zip_data) +def _write_to_file(responses: list[QueryResponse], out_path: Path, format: OutputFormatsEnum): + file = io.BytesIO() + + indexJson: CVSXFilesIndex = { + "metadata": None, + "query": None, + } + assets_folder_name = 'assets' + match format: + case OutputFormatsEnum.cvsx: + _write_cvsx_to_file(file=file, responses=responses, indexJson=indexJson, out_path=out_path) + case OutputFormatsEnum.mvsx: + _write_mvsx_to_file(file=file, responses=responses, indexJson=indexJson, out_path=out_path, assets_folder_name=assets_folder_name) + case _: + raise Exception(f'Format {format} is not supported.') def _get_timeframes_from_timeinfo(t: TimeInfo, segmentation_id: str): return list(range(t[segmentation_id]["start"], t[segmentation_id]["end"] + 1)) @@ -516,12 +701,12 @@ async def query(args: argparse.Namespace): # 1. Parse argparse args parsed_args = _parse_argparse_args(args) # 2. Parse json params - parsed_params = _parse_json_params(parsed_args["json_params_path"]) + parsed_params = _parse_json_params(parsed_args.json_params_path) - entry_id = parsed_params["entry_id"] - source_db = parsed_params["source_db"] + entry_id = parsed_params.entry_id + source_db = parsed_params.source_db - db = FileSystemVolumeServerDB(folder=Path(parsed_args["db_path"])) + db = FileSystemVolumeServerDB(folder=Path(parsed_args.db_path)) # initialize server volume_server = VolumeServerService(db) @@ -529,7 +714,7 @@ async def query(args: argparse.Namespace): # 3. query metadata metadata = await volume_server.get_metadata( req=MetadataRequest( - source=parsed_params["source_db"], structure_id=parsed_params["entry_id"] + source=parsed_params.source_db, structure_id=parsed_params.entry_id ) ) grid_metadata: Metadata = metadata["grid"] @@ -606,7 +791,7 @@ async def query(args: argparse.Namespace): r = await query.execute() responses.append(r) - _write_to_file(responses, parsed_args["out"]) + _write_to_file(responses, parsed_args.out, parsed_args.format) async def main(): @@ -616,14 +801,16 @@ async def main(): # common_subparsers = main_parser.add_subparsers(title='Query type', dest='query_type', help='Select one of: ') # COMMON ARGUMENTS + # TODO: check if extension and format are in agreement (better) or exclude extension (worse) required_named = main_parser.add_argument_group("Required named arguments") + # TODO: check if choices should be a list instead + required_named.add_argument("--format", type=str, default='mvsx', choices=['mvsx', 'cvsx'], required=True, help="Produce CVSX or mvsx file as an output") required_named.add_argument("--db_path", type=str, required=True, help="Path to db") - # TODO: exclude extension required_named.add_argument( "--out", type=str, required=True, help="Path to output file including extension" ) required_named.add_argument( - "--json-params-path", + "--json_params_path", required=True, type=str, help="Path to .json file with query parameters",