From a74327d0db136c82358d954ba6c2d26dc3d2e09b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sophia=20M=C3=A4dler?=
 <15019107+sophiamaedler@users.noreply.github.com>
Date: Mon, 10 Feb 2025 16:36:46 +0100
Subject: [PATCH] [FEATURE] make rechunking optional when reading from an
 existing spatialdata object

it should be possible to turn of rechunking when reading from an existing spatialdata object under the asumption that the chunksize was set logically. This prevents having to recalculate already existing chunks which has a lot of overhead during writing.
---
 src/scportrait/pipeline/project.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/scportrait/pipeline/project.py b/src/scportrait/pipeline/project.py
index ef2d5112..9f4b36c1 100644
--- a/src/scportrait/pipeline/project.py
+++ b/src/scportrait/pipeline/project.py
@@ -846,6 +846,7 @@ def load_input_from_sdata(
         overwrite: bool | None = None,
         keep_all: bool = True,
         remove_duplicates: bool = True,
+        rechunk: bool = False,
     ) -> None:
         """
         Load input image from a spatialdata object.
@@ -890,8 +891,9 @@ def load_input_from_sdata(
             image_c, image_x, image_y = image.scale0.image.shape
 
             # ensure chunking is correct
-            for scale in image:
-                self._check_chunk_size(image[scale].image, chunk_size=self.DEFAULT_CHUNK_SIZE_3D)
+            if rechunk:
+                for scale in image:
+                    self._check_chunk_size(image[scale].image, chunk_size=self.DEFAULT_CHUNK_SIZE_3D)
 
             # get channel names
             channel_names = image.scale0.image.c.values
@@ -900,7 +902,8 @@ def load_input_from_sdata(
             image_c, image_x, image_y = image.shape
 
             # ensure chunking is correct
-            self._check_chunk_size(image, chunk_size=self.DEFAULT_CHUNK_SIZE_3D)
+            if rechunk:
+                self._check_chunk_size(image, chunk_size=self.DEFAULT_CHUNK_SIZE_3D)
 
             channel_names = image.c.values
 
@@ -931,7 +934,9 @@ def load_input_from_sdata(
                 mask_y == image_y
             ), "Nucleus segmentation mask does not match input image size."
 
-            self._check_chunk_size(mask, chunk_size=self.DEFAULT_CHUNK_SIZE_2D)  # ensure chunking is correct
+            if rechunk:
+                self._check_chunk_size(mask, chunk_size=self.DEFAULT_CHUNK_SIZE_2D)  # ensure chunking is correct
+
             self.filehandler._write_segmentation_object_sdata(mask, self.nuc_seg_name)
 
         # check if a cytosol segmentation exists and if so add it to the sdata object
@@ -951,7 +956,9 @@ def load_input_from_sdata(
                 mask_y == image_y
             ), "Nucleus segmentation mask does not match input image size."
 
-            self._check_chunk_size(mask, chunk_size=self.DEFAULT_CHUNK_SIZE_2D)  # ensure chunking is correct
+            if rechunk:
+                self._check_chunk_size(mask, chunk_size=self.DEFAULT_CHUNK_SIZE_2D)  # ensure chunking is correct
+
             self.filehandler._write_segmentation_object_sdata(mask, self.cyto_seg_name)
 
         self.get_project_status()