seung-lab · william-silversmith · Aug 14, 2024 · Jan 16, 2024 · Jan 16, 2024 · Jan 17, 2024
diff --git a/igneous/task_creation/common.py b/igneous/task_creation/common.py
@@ -226,6 +226,8 @@ def set_encoding(cv, mip, encoding, encoding_level):
 
   if encoding == "jpeg":
     scale["jpeg_quality"] = encoding_level
+  elif encoding == "jpegxl":
+    scale["jpegxl_quality"] = encoding_level
   elif encoding == "png":
     scale["png_level"] = encoding_level
   elif encoding == "fpzip":

diff --git a/igneous/task_creation/image.py b/igneous/task_creation/image.py
@@ -406,14 +406,6 @@ def create_sharded_image_info(
   # maximum amount of information in the morton codes
   grid_size = np.ceil(Vec(*dataset_size) / Vec(*chunk_size)).astype(np.int64)
   max_bits = sum([ math.ceil(math.log2(size)) for size in grid_size ])
-  if max_bits > 64:
-    raise ValueError(
-      f"{max_bits}, more than a 64-bit integer, "
-      "would be required to describe the chunk positions "
-      "in this dataset. Try increasing the chunk size or "
-      "increasing dataset bounds."
-      f"Dataset Size: {dataset_size} Chunk Size: {chunk_size}"
-    )
 
   chunks_per_shard = math.ceil(uncompressed_shard_bytesize / (chunk_voxels * byte_width))
   chunks_per_shard = 2 ** int(math.log2(chunks_per_shard))
@@ -423,7 +415,7 @@ def create_sharded_image_info(
 
   # approximate, would need to account for rounding effects to be exact
   # rounding is corrected for via max_bits - pre - mini below.
-  num_shards = num_chunks / chunks_per_shard 
+  num_shards = num_chunks / chunks_per_shard
 
   def update_bits():
     shard_bits = int(math.ceil(math.log2(num_shards)))
@@ -465,7 +457,25 @@ def update_bits():
   # in the morton codes, so if there's any slack from rounding, the
   # remainder goes into shard bits.
   preshift_bits = preshift_bits - minishard_bits
-  shard_bits = max_bits - preshift_bits - minishard_bits
+  if dataset_size[2] == chunk_size[2]:
+    additional_bits = (preshift_bits // 3)
+    i = 0
+    while i < additional_bits:
+      max_bits += 1
+      preshift_bits += 1
+      if preshift_bits % 3 != 0:
+        i += 1
+
+  shard_bits = max(max_bits - preshift_bits - minishard_bits, 0)
+
+  if max_bits > 64:
+    raise ValueError(
+      f"{max_bits}, more than a 64-bit integer, "
+      "would be required to describe the chunk positions "
+      "in this dataset. Try increasing the chunk size or "
+      "increasing dataset bounds."
+      f"Dataset Size: {dataset_size} Chunk Size: {chunk_size}"
+    )
 
   if preshift_bits < 0:
     raise ValueError(f"Preshift bits cannot be negative. ({shard_bits}, {minishard_bits}, {preshift_bits}), total info: {max_bits} bits")

diff --git a/igneous/task_creation/skeleton.py b/igneous/task_creation/skeleton.py
@@ -15,7 +15,7 @@
 from cloudvolume import CloudVolume
 from cloudvolume.lib import Vec, Bbox, max2, min2, xyzrange, find_closest_divisor, yellow, jsonify
 from cloudvolume.datasource.precomputed.sharding import ShardingSpecification
-from cloudfiles import CloudFiles
+from cloudfiles import CloudFiles, CloudFile
 
 from igneous.tasks import ( 
   SkeletonTask, UnshardedSkeletonMergeTask, 
@@ -58,6 +58,7 @@ def bounds_from_mesh(
     bbxes.append(bounds)
 
   bounds = Bbox.expand(*bbxes)
+  bounds = bounds.expand_to_chunk_size(shape, offset=vol.voxel_offset)
   return Bbox.clamp(bounds, vol.bounds)
 
 def create_skeletonizing_tasks(
@@ -71,9 +72,11 @@ def create_skeletonizing_tasks(
     parallel=1, fill_missing=False, 
     sharded=False, frag_path=None, spatial_index=True,
     synapses=None, num_synapses=None,
-    dust_global=False, 
+    dust_global=False, fix_autapses=False,
     cross_sectional_area=False,
     cross_sectional_area_smoothing_window=5,
+    timestamp=None,
+    root_ids_cloudpath=None,
   ):
   """
   Assign tasks with one voxel overlap in a regular grid 
@@ -121,6 +124,17 @@ def create_skeletonizing_tasks(
   fix_borders: Allows trivial merging of single overlap tasks. You'll only
     want to set this to false if you're working on single or non-overlapping
     volumes.
+  fix_autapses: Only possible for graphene volumes. Uses PyChunkGraph (PCG) information
+    to fix autapses (when a neuron synapses onto itself). This requires splitting
+    contacts between the edges of two touching voxels. The algorithm for doing this
+    requires much more memory.
+
+    This works by comparing the PYC L2 and root layers. L1 is watershed. L2 is the
+    connections only within an atomic chunk. The root layer provides the global
+    connectivity. Autapses can be distinguished at the L2 level, above that, they
+    may not be (and certainly not at the root level). We extract the voxel connectivity
+    graph from L2 and perform the overall trace at root connectivity.
+
   dust_threshold: don't skeletonize labels smaller than this number of voxels
     as seen by a single task.
   dust_global: Use global voxel counts for the dust threshold instead of from
@@ -155,10 +169,24 @@ def create_skeletonizing_tasks(
   to the total computation.)
   cross_sectional_area_smoothing_window: Perform a rolling average of the 
     normal vectors across these many vectors.
+  timestamp: for graphene volumes only, you can specify the timepoint to use
+  root_ids_cloudpath: for graphene volumes, if you have a materialized archive
+    if your desired timepoint, you can use this path for fetching root ID 
+    segmentation as it is far more efficient.
   """
   shape = Vec(*shape)
   vol = CloudVolume(cloudpath, mip=mip, info=info)
 
+  if fix_autapses:
+    if vol.meta.path.format != "graphene":
+      raise ValueError("fix_autapses can only be performed on graphene volumes.")
+
+    if not np.all(shape % vol.meta.graph_chunk_size == 0):
+      raise ValueError(
+        f"shape must be a multiple of the graph chunk size. Got: {shape}, "
+        f"{vol.meta.graph_chunk_size}"
+      )
+
   if dust_threshold > 0 and dust_global:
     cf = CloudFiles(cloudpath)
     vxctfile = cf.join(vol.key, 'stats', 'voxel_counts.mb')
@@ -201,6 +229,15 @@ def create_skeletonizing_tasks(
 
   vol.skeleton.meta.commit_info()
 
+  if frag_path:
+    frag_info_path = CloudFiles(frag_path).join(frag_path, "info")
+    frag_info = CloudFile(frag_info_path).get_json()
+    if not frag_info:
+      CloudFile(frag_info_path).put_json(vol.skeleton.meta.info)
+    elif 'scales' in frag_info:
+      frag_info_path = CloudFiles(frag_path).join(frag_path, vol.info["skeletons"], "info")
+      CloudFile(frag_info_path).put_json(vol.skeleton.meta.info)
+
   will_postprocess = bool(np.any(vol.bounds.size3() > shape))
   bounds = vol.bounds.clone()
 
@@ -247,8 +284,11 @@ def task(self, shape, offset):
         spatial_grid_shape=shape.clone(), # used for writing index filenames
         synapses=bbox_synapses,
         dust_global=dust_global,
+        fix_autapses=bool(fix_autapses),
+        timestamp=timestamp,
         cross_sectional_area=bool(cross_sectional_area),
         cross_sectional_area_smoothing_window=int(cross_sectional_area_smoothing_window),
+        root_ids_cloudpath=root_ids_cloudpath,
       )
 
     def synapses_for_bbox(self, shape, offset):
@@ -292,8 +332,11 @@ def on_finish(self):
           'spatial_index': bool(spatial_index),
           'synapses': bool(synapses),
           'dust_global': bool(dust_global),
+          'fix_autapses': bool(fix_autapses),
+          'timestamp': timestamp,
           'cross_sectional_area': bool(cross_sectional_area),
           'cross_sectional_area_smoothing_window': int(cross_sectional_area_smoothing_window),
+          'root_ids_cloudpath': root_ids_cloudpath,
         },
         'by': operator_contact(),
         'date': strftime('%Y-%m-%d %H:%M %Z'),

diff --git a/igneous/tasks/image/image.py b/igneous/tasks/image/image.py
@@ -70,7 +70,7 @@ def downsample_and_upload(
     if max_mips is not None:
       factors = factors[:max_mips]
 
-    if len(factors) == 0:
+    if len(factors) == 0 and max_mips:
       print("No factors generated. Image Shape: {}, Downsample Shape: {}, Volume Shape: {}, Bounds: {}".format(
           image.shape, ds_shape, vol.volume_size, bounds)
       )
@@ -327,6 +327,9 @@ def execute(self):
       cts = np.bincount(img2d)
       levels[0:len(cts)] += cts.astype(np.uint64)
 
+    if len(bboxes) == 0:
+      return
+
     covered_area = sum([bbx.volume() for bbx in bboxes])
 
     bboxes = [(bbox.volume(), bbox.size3()) for bbox in bboxes]
@@ -376,7 +379,8 @@ def select_bounding_boxes(self, dataset_bounds):
       patch_start += self.offset
       bbox = Bbox(patch_start, patch_start + sample_shape.size3())
       bbox = Bbox.clamp(bbox, dataset_bounds)
-      bboxes.append(bbox)
+      if not bbox.subvoxel():
+        bboxes.append(bbox)
     return bboxes
 
 @queueable