Skip to content

Commit

Permalink
Merge pull request #2 from chris-delg/refactor
Browse files Browse the repository at this point in the history
refactored modules to uses classes in order to efficiently pass in ne…
  • Loading branch information
chris-delg authored Jul 11, 2024
2 parents 7a8ea45 + dd6dda1 commit 25c77df
Show file tree
Hide file tree
Showing 6 changed files with 328 additions and 295 deletions.
145 changes: 16 additions & 129 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,148 +1,35 @@
from pathlib import Path
import shutil
import matplotlib.pyplot as plt
import zarr_libraries
from zarr_libraries.tensorstore import tensorstore_zarr
from zarr_libraries.acquire import acquire_zarr
from zarr_libraries.zarr_python import zarr_python
from zarr_libraries.ome_ngff import ome_zarr
from zarr_libraries import *


abs_path_to_data = str(Path(__file__).parent / "zarr_libraries/example_data/")
frame_multipliers = [
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20,
25, 30, 35, 40, 45, 50, 60, 70, 80, 100
]


def acquire_radialSin_test() -> None:
print("\nRadial sin Zarr data\n---------------------\n")
file_sizes = []
bandwidths = []

for i, multiplier in enumerate(frame_multipliers):
if 1 < i < len(frame_multipliers) - 1: continue
acquire_zarr.setup_camera(
sim_type = "simulated: uniform random",
folder_path = abs_path_to_data + f"/acquire_data/radialSin{multiplier}.zarr",
max_frames = 64 * multiplier
)
print(f"Acquire Folder {multiplier}")
acquire_zarr.setup_dimensions(
x_array_size=1920, # set to 1920
x_chunk_size=960, # set to 960
y_array_size=1080, # set to 1080
y_chunk_size=540, # set to 540
t_array_size=0, # set to 0
t_chunk_size=64 # set to 64
)
time = acquire_zarr.create_zarr()
size = zarr_libraries.folder_size(folder_path = abs_path_to_data + f"/acquire_data/radialSin{multiplier}.zarr/0")
shutil.rmtree(abs_path_to_data + f"/acquire_data/radialSin{multiplier}.zarr")
file_sizes.append(size * 10**-9)
bandwidths.append(size * 10 **-9 / time)

print("--------------------------------------------------------------\n\n")
plt.plot(file_sizes, bandwidths, label="Acquire Writes")


def tensorstore_radialSin_copy_test() -> None:
print("\nRadial sin Zarr data\n---------------------\n")

for index, multiplier in enumerate(frame_multipliers):
print(f"TensorStore Folder {multiplier}")
tensorstore_zarr.copy_zarr(
source_path = abs_path_to_data + f"/tensorstore_data/radialSinTs{multiplier}.zarr",
result_path = abs_path_to_data + f"/tensorstore_data/radialSinTs{multiplier}.zarr"
)
zarr_libraries.folder_size(folder_path = abs_path_to_data + f"/tensorstore_data/radialSinTs{multiplier}.zarr")

print("--------------------------------------------------------------\n\n")


def tensorstore_continuous_write_test(append_dim_size: int) -> None:
print("\n\n--------Tensorstore Stress Test--------\n\n")
file_sizes, bandwidths = tensorstore_zarr.continuous_write(
result_path = abs_path_to_data + "/tensorstore_data/stressTest.zarr",
append_dim_size = append_dim_size
)
print("--------------------------------------------------------------\n\n")
plt.plot(file_sizes, bandwidths, label="tensorstore writes")


def tensorstore_continuous_append_test(append_dim_size: int) -> None:
print("\n\n--------Tensorstore Stress Test--------\n\n")
file_sizes, bandwidths = tensorstore_zarr.continuous_write_append(
result_path = abs_path_to_data + "/tensorstore_data/stressTestAppend.zarr",
append_dim_size = append_dim_size
)
print("--------------------------------------------------------------\n\n")
plt.plot(file_sizes, bandwidths, label="tensorstore append")


def zarr_python_continuous_write_test(append_dim_size: int) -> None:
print("\n\n--------Zarr-Python Stress Test--------\n\n")
file_sizes, bandwidths = zarr_python.continuous_write(
result_path = abs_path_to_data + "/zarr_python_data/stressTest.zarr",
append_dim_size = append_dim_size
)
print("--------------------------------------------------------------\n\n")
plt.plot(file_sizes, bandwidths, label="zarr-python writes")


def zarr_python_continuous_append_test(append_dim_size: int) -> None:
print("\n\n--------Zarr-Python Append Stress Test--------\n\n")
file_sizes, bandwidths = zarr_python.continuous_write_append(
result_path = abs_path_to_data + "/zarr_python_data/stressTestAppend.zarr",
append_dim_size = append_dim_size
)
print("--------------------------------------------------------------\n\n")
plt.plot(file_sizes, bandwidths, label="zarr-python append")


def ome_zarr_continuous_write_test(append_dim_size: int) -> None:
print("\n\n--------OME-Zarr Stress Test--------\n\n")
file_sizes, bandwidths = ome_zarr.continuous_write(
result_path = abs_path_to_data + "/ome_zarr_data/stressTest.zarr",
append_dim_size = append_dim_size
)
print("--------------------------------------------------------------\n\n")
plt.plot(file_sizes, bandwidths, label="ome-zarr writes")


def ome_zarr_continuous_append_test(append_dim_size: int) -> None:
print("\n\n--------Ome-Zarr Append Stress Test--------\n\n")
file_sizes, bandwidths = ome_zarr.continuous_write_append(
result_path = abs_path_to_data + "/ome_zarr_data/stressTestAppend.zarr",
append_dim_size = append_dim_size
)
print("--------------------------------------------------------------\n\n")
plt.plot(file_sizes, bandwidths, label="ome-zarr append")


if __name__ == "__main__":
def main() -> None:
zarr_python = Zarr_Python(shape=[64, 1080, 1920], chunks=[64, 540, 960])
tensorstore = Tensorstore(shape=[64, 1080, 1920], chunks=[64, 540, 960])
ome_zarr = Ome_Zarr(shape=[64, 1080, 1920], chunks=[64, 540, 960])
'''
Append Tests:
- These tests benchmark the continuous appending to a single zarr folder
- The append dimension size passed in equates to ~26 gigs of data
'''
zarr_python_continuous_append_test(append_dim_size=300)
tensorstore_continuous_append_test(append_dim_size=300)
# at around 17 gigs of data ome-zarr throws an error, append_dim_size = 12 is right before that happens
#ome_zarr_continuous_append_test(append_dim_size=12)
#zarr_python.continuous_append_test(append_dim_size=30)
#tensorstore.continuous_append_test(append_dim_size=12)
# at around 17 gigs of data ome-zarr throws an error
#ome_zarr.continuous_append_test(append_dim_size=12)

'''
Continuous write tests:
- These tests benchmark the creation of many increasingly large zarr folders
'''
#acquire_radialSin_test()
#tensorstore_radialSin_copy_test()
#tensorstore_continuous_write_test(append_dim_size=30)
#zarr_python_continuous_write_test(append_dim_size=30)
#ome_zarr_continuous_write_test(append_dim_size=30)
#tensorstore.continuous_write_test(append_dim_size=10)
#zarr_python.continuous_write_test(append_dim_size=10)
#ome_zarr.continuous_write_test(append_dim_size=10)

plt.legend()
plt.xlabel("Data Size (GB)")
plt.ylabel("Bandwidth (GBps)")
plt.show()


if __name__ == "__main__":
main()
5 changes: 4 additions & 1 deletion zarr_libraries/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
from zarr_libraries.common import *
from zarr_libraries.common import *
from zarr_libraries.tensorstore.tensorstore_zarr import *
from zarr_libraries.zarr_python.zarr_python import *
from zarr_libraries.ome_ngff.ome_zarr import *
43 changes: 42 additions & 1 deletion zarr_libraries/acquire/acquire_zarr.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import acquire
import time

import matplotlib.pyplot as plt
from zarr_libraries import folder_size
import shutil
from pathlib import Path

global runtime
global dm
Expand Down Expand Up @@ -65,3 +68,41 @@ def create_zarr() -> int:
total_time = time.perf_counter() - t
print(f"Acquire -> creating zarr : {total_time} seconds")
return total_time


def acquire_radialSin_test() -> None:
abs_path_to_data = str((Path(__file__).parent / "../example_data/acquire_data").resolve())
frame_multipliers = [
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20,
25, 30, 35, 40, 45, 50, 60, 70, 80, 100
]

print("\nRadial sin Zarr data\n---------------------\n")
file_sizes = []
bandwidths = []

for i, multiplier in enumerate(frame_multipliers):
if 1 < i < len(frame_multipliers) - 1: continue
setup_camera(
sim_type = "simulated: uniform random",
folder_path = abs_path_to_data + f"/acquire_data/radialSin{multiplier}.zarr",
max_frames = 64 * multiplier
)
print(f"Acquire Folder {multiplier}")
setup_dimensions(
x_array_size=1920, # set to 1920
x_chunk_size=960, # set to 960
y_array_size=1080, # set to 1080
y_chunk_size=540, # set to 540
t_array_size=0, # set to 0
t_chunk_size=64 # set to 64
)
time = create_zarr()
size = folder_size(folder_path = abs_path_to_data + f"/acquire_data/radialSin{multiplier}.zarr/0")
shutil.rmtree(abs_path_to_data + f"/acquire_data/radialSin{multiplier}.zarr")
file_sizes.append(size * 10**-9)
bandwidths.append(size * 10 **-9 / time)

print("--------------------------------------------------------------\n\n")
plt.plot(file_sizes, bandwidths, label="Acquire Writes")
114 changes: 74 additions & 40 deletions zarr_libraries/ome_ngff/ome_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,55 +5,89 @@
from zarr_libraries import folder_size
from ome_zarr.io import parse_url
from ome_zarr.writer import write_image
from pathlib import Path
import matplotlib.pyplot as plt
import sys


def continuous_write(result_path: str, append_dim_size: int) -> tuple[list, list]:
file_sizes = []
bandwidths = []
class Ome_Zarr:
def __init__(self, shape: list, chunks: list) -> None:
self.abs_path_to_data = str((Path(__file__).parent / "../example_data/ome_zarr_data").resolve())
self.shape = shape
self.chunks = chunks

for i in range(1, append_dim_size + 1):

def __continuous_write(self, result_path: str, append_dim_size: int) -> tuple[list, list]:
file_sizes = []
bandwidths = []

for i in range(1, append_dim_size + 1):
new_shape = (self.shape[0] * i, *self.shape[1:]) # modify the append dimension, unpack the rest

store = parse_url(result_path, mode="w").store
root = zarr.group(store=store)

t = time.perf_counter()
zarr_data = np.random.randint(low=0, high=256, size=new_shape, dtype=np.uint8)
write_image(image=zarr_data, group=root, axes="tyx", storage_options=dict(chunks=self.chunks))
total_time = time.perf_counter() - t

print(f"Write #{i}\nOME-Zarr -> creating zarr : {total_time} seconds")
size = folder_size(result_path)
file_sizes.append(size * 10**-9) # converts bytes to GB
bandwidths.append((size * 10**-9) / total_time) # GB/s
shutil.rmtree(result_path)

return file_sizes, bandwidths


def __continuous_append(self, result_path: str, append_dim_size: int) -> tuple[list, list]:
file_sizes = []
bandwidths = []
total_time = 0

store = parse_url(result_path, mode="w").store
root = zarr.group(store=store)

t = time.perf_counter()
zarr_data = np.random.randint(low=0, high=256, size=((64 * i), 1080, 1920), dtype=np.uint8)
write_image(image=zarr_data, group=root, axes="tyx", storage_options=dict(chunks=(64, 540, 960)))
total_time = time.perf_counter() - t

print(f"Write #{i}\nOME-Zarr -> creating zarr : {total_time} seconds")
size = folder_size(result_path)
file_sizes.append(size * 10**-9) # converts bytes to GB
bandwidths.append((size * 10**-9) / total_time) # GB/s
zarr_data = np.random.randint(low=0, high=256, size=self.shape, dtype=np.uint8)
write_image(image=zarr_data, group=root, axes="tyx", storage_options=dict(chunks=self.chunks))
total_time += time.perf_counter() - t
shutil.rmtree(result_path)

return file_sizes, bandwidths


def continuous_write_append(result_path: str, append_dim_size: int) -> tuple[list, list]:
file_sizes = []
bandwidths = []
total_time = 0

store = parse_url(result_path, mode="w").store
root = zarr.group(store=store)
for i in range(2, append_dim_size + 1):
t = time.perf_counter()
new_data = np.random.randint(low=0, high=256, size=self.shape, dtype=np.uint8)
zarr_data = np.concatenate((zarr_data, new_data), axis=0)
write_image(image=zarr_data, group=root, axes="tyx", storage_options=dict(chunks=self.chunks))
total_time += time.perf_counter() - t

print(f"Write #{i}\nOME-Zarr -> append zarr : {total_time} seconds")
size = folder_size(result_path)
file_sizes.append(size * 10**-9) # converts bytes to GB
bandwidths.append((size * 10**-9) / total_time) # GB/s
shutil.rmtree(result_path)

return file_sizes, bandwidths

t = time.perf_counter()
zarr_data = np.random.randint(low=0, high=256, size=(64, 1080, 1920), dtype=np.uint8)
write_image(image=zarr_data, group=root, axes="tyx", storage_options=dict(chunks=(64, 540, 960)))
total_time += time.perf_counter() - t
shutil.rmtree(result_path)

for i in range(2, append_dim_size + 1):
t = time.perf_counter()
new_data = np.random.randint(low=0, high=256, size=((64 * i), 1080, 1920), dtype=np.uint8)
zarr_data = np.concatenate((zarr_data, new_data), axis=0)
write_image(image=zarr_data, group=root, axes="tyx", storage_options=dict(chunks=(64, 540, 960)))
total_time += time.perf_counter() - t
def continuous_write_test(self, append_dim_size: int) -> None:
print("\n\n--------OME-Zarr Stress Test--------\n\n")
file_sizes, bandwidths = self.__continuous_write(
result_path = self.abs_path_to_data + "/stressTest.zarr",
append_dim_size = append_dim_size
)
print("--------------------------------------------------------------\n\n")
plt.plot(file_sizes, bandwidths, label="ome-zarr writes")

print(f"Write #{i}\nOME-Zarr -> append zarr : {total_time} seconds")
size = folder_size(result_path)
file_sizes.append(size * 10**-9) # converts bytes to GB
bandwidths.append((size * 10**-9) / total_time) # GB/s
shutil.rmtree(result_path)

return file_sizes, bandwidths

def continuous_append_test(self, append_dim_size: int) -> None:
print("\n\n--------Ome-Zarr Append Stress Test--------\n\n")
file_sizes, bandwidths = self.__continuous_append(
result_path = self.abs_path_to_data + "/stressTestAppend.zarr",
append_dim_size = append_dim_size
)
print("--------------------------------------------------------------\n\n")
plt.plot(file_sizes, bandwidths, label="ome-zarr append")


Loading

0 comments on commit 25c77df

Please sign in to comment.