Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactored modules to uses classes in order to efficiently pass in ne… #2

Merged
merged 1 commit into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 16 additions & 129 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,148 +1,35 @@
from pathlib import Path
import shutil
import matplotlib.pyplot as plt
import zarr_libraries
from zarr_libraries.tensorstore import tensorstore_zarr
from zarr_libraries.acquire import acquire_zarr
from zarr_libraries.zarr_python import zarr_python
from zarr_libraries.ome_ngff import ome_zarr
from zarr_libraries import *


abs_path_to_data = str(Path(__file__).parent / "zarr_libraries/example_data/")
frame_multipliers = [
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20,
25, 30, 35, 40, 45, 50, 60, 70, 80, 100
]


def acquire_radialSin_test() -> None:
print("\nRadial sin Zarr data\n---------------------\n")
file_sizes = []
bandwidths = []

for i, multiplier in enumerate(frame_multipliers):
if 1 < i < len(frame_multipliers) - 1: continue
acquire_zarr.setup_camera(
sim_type = "simulated: uniform random",
folder_path = abs_path_to_data + f"/acquire_data/radialSin{multiplier}.zarr",
max_frames = 64 * multiplier
)
print(f"Acquire Folder {multiplier}")
acquire_zarr.setup_dimensions(
x_array_size=1920, # set to 1920
x_chunk_size=960, # set to 960
y_array_size=1080, # set to 1080
y_chunk_size=540, # set to 540
t_array_size=0, # set to 0
t_chunk_size=64 # set to 64
)
time = acquire_zarr.create_zarr()
size = zarr_libraries.folder_size(folder_path = abs_path_to_data + f"/acquire_data/radialSin{multiplier}.zarr/0")
shutil.rmtree(abs_path_to_data + f"/acquire_data/radialSin{multiplier}.zarr")
file_sizes.append(size * 10**-9)
bandwidths.append(size * 10 **-9 / time)

print("--------------------------------------------------------------\n\n")
plt.plot(file_sizes, bandwidths, label="Acquire Writes")


def tensorstore_radialSin_copy_test() -> None:
print("\nRadial sin Zarr data\n---------------------\n")

for index, multiplier in enumerate(frame_multipliers):
print(f"TensorStore Folder {multiplier}")
tensorstore_zarr.copy_zarr(
source_path = abs_path_to_data + f"/tensorstore_data/radialSinTs{multiplier}.zarr",
result_path = abs_path_to_data + f"/tensorstore_data/radialSinTs{multiplier}.zarr"
)
zarr_libraries.folder_size(folder_path = abs_path_to_data + f"/tensorstore_data/radialSinTs{multiplier}.zarr")

print("--------------------------------------------------------------\n\n")


def tensorstore_continuous_write_test(append_dim_size: int) -> None:
print("\n\n--------Tensorstore Stress Test--------\n\n")
file_sizes, bandwidths = tensorstore_zarr.continuous_write(
result_path = abs_path_to_data + "/tensorstore_data/stressTest.zarr",
append_dim_size = append_dim_size
)
print("--------------------------------------------------------------\n\n")
plt.plot(file_sizes, bandwidths, label="tensorstore writes")


def tensorstore_continuous_append_test(append_dim_size: int) -> None:
print("\n\n--------Tensorstore Stress Test--------\n\n")
file_sizes, bandwidths = tensorstore_zarr.continuous_write_append(
result_path = abs_path_to_data + "/tensorstore_data/stressTestAppend.zarr",
append_dim_size = append_dim_size
)
print("--------------------------------------------------------------\n\n")
plt.plot(file_sizes, bandwidths, label="tensorstore append")


def zarr_python_continuous_write_test(append_dim_size: int) -> None:
print("\n\n--------Zarr-Python Stress Test--------\n\n")
file_sizes, bandwidths = zarr_python.continuous_write(
result_path = abs_path_to_data + "/zarr_python_data/stressTest.zarr",
append_dim_size = append_dim_size
)
print("--------------------------------------------------------------\n\n")
plt.plot(file_sizes, bandwidths, label="zarr-python writes")


def zarr_python_continuous_append_test(append_dim_size: int) -> None:
print("\n\n--------Zarr-Python Append Stress Test--------\n\n")
file_sizes, bandwidths = zarr_python.continuous_write_append(
result_path = abs_path_to_data + "/zarr_python_data/stressTestAppend.zarr",
append_dim_size = append_dim_size
)
print("--------------------------------------------------------------\n\n")
plt.plot(file_sizes, bandwidths, label="zarr-python append")


def ome_zarr_continuous_write_test(append_dim_size: int) -> None:
print("\n\n--------OME-Zarr Stress Test--------\n\n")
file_sizes, bandwidths = ome_zarr.continuous_write(
result_path = abs_path_to_data + "/ome_zarr_data/stressTest.zarr",
append_dim_size = append_dim_size
)
print("--------------------------------------------------------------\n\n")
plt.plot(file_sizes, bandwidths, label="ome-zarr writes")


def ome_zarr_continuous_append_test(append_dim_size: int) -> None:
print("\n\n--------Ome-Zarr Append Stress Test--------\n\n")
file_sizes, bandwidths = ome_zarr.continuous_write_append(
result_path = abs_path_to_data + "/ome_zarr_data/stressTestAppend.zarr",
append_dim_size = append_dim_size
)
print("--------------------------------------------------------------\n\n")
plt.plot(file_sizes, bandwidths, label="ome-zarr append")


if __name__ == "__main__":
def main() -> None:
zarr_python = Zarr_Python(shape=[64, 1080, 1920], chunks=[64, 540, 960])
tensorstore = Tensorstore(shape=[64, 1080, 1920], chunks=[64, 540, 960])
ome_zarr = Ome_Zarr(shape=[64, 1080, 1920], chunks=[64, 540, 960])
'''
Append Tests:
- These tests benchmark the continuous appending to a single zarr folder
- The append dimension size passed in equates to ~26 gigs of data
'''
zarr_python_continuous_append_test(append_dim_size=300)
tensorstore_continuous_append_test(append_dim_size=300)
# at around 17 gigs of data ome-zarr throws an error, append_dim_size = 12 is right before that happens
#ome_zarr_continuous_append_test(append_dim_size=12)
#zarr_python.continuous_append_test(append_dim_size=30)
#tensorstore.continuous_append_test(append_dim_size=12)
# at around 17 gigs of data ome-zarr throws an error
#ome_zarr.continuous_append_test(append_dim_size=12)

'''
Continuous write tests:
- These tests benchmark the creation of many increasingly large zarr folders
'''
#acquire_radialSin_test()
#tensorstore_radialSin_copy_test()
#tensorstore_continuous_write_test(append_dim_size=30)
#zarr_python_continuous_write_test(append_dim_size=30)
#ome_zarr_continuous_write_test(append_dim_size=30)
#tensorstore.continuous_write_test(append_dim_size=10)
#zarr_python.continuous_write_test(append_dim_size=10)
#ome_zarr.continuous_write_test(append_dim_size=10)

plt.legend()
plt.xlabel("Data Size (GB)")
plt.ylabel("Bandwidth (GBps)")
plt.show()


if __name__ == "__main__":
main()
5 changes: 4 additions & 1 deletion zarr_libraries/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
from zarr_libraries.common import *
from zarr_libraries.common import *
from zarr_libraries.tensorstore.tensorstore_zarr import *
from zarr_libraries.zarr_python.zarr_python import *
from zarr_libraries.ome_ngff.ome_zarr import *
43 changes: 42 additions & 1 deletion zarr_libraries/acquire/acquire_zarr.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import acquire
import time

import matplotlib.pyplot as plt
from zarr_libraries import folder_size
import shutil
from pathlib import Path

global runtime
global dm
Expand Down Expand Up @@ -65,3 +68,41 @@ def create_zarr() -> int:
total_time = time.perf_counter() - t
print(f"Acquire -> creating zarr : {total_time} seconds")
return total_time


def acquire_radialSin_test() -> None:
abs_path_to_data = str((Path(__file__).parent / "../example_data/acquire_data").resolve())
frame_multipliers = [
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20,
25, 30, 35, 40, 45, 50, 60, 70, 80, 100
]

print("\nRadial sin Zarr data\n---------------------\n")
file_sizes = []
bandwidths = []

for i, multiplier in enumerate(frame_multipliers):
if 1 < i < len(frame_multipliers) - 1: continue
setup_camera(
sim_type = "simulated: uniform random",
folder_path = abs_path_to_data + f"/acquire_data/radialSin{multiplier}.zarr",
max_frames = 64 * multiplier
)
print(f"Acquire Folder {multiplier}")
setup_dimensions(
x_array_size=1920, # set to 1920
x_chunk_size=960, # set to 960
y_array_size=1080, # set to 1080
y_chunk_size=540, # set to 540
t_array_size=0, # set to 0
t_chunk_size=64 # set to 64
)
time = create_zarr()
size = folder_size(folder_path = abs_path_to_data + f"/acquire_data/radialSin{multiplier}.zarr/0")
shutil.rmtree(abs_path_to_data + f"/acquire_data/radialSin{multiplier}.zarr")
file_sizes.append(size * 10**-9)
bandwidths.append(size * 10 **-9 / time)

print("--------------------------------------------------------------\n\n")
plt.plot(file_sizes, bandwidths, label="Acquire Writes")
114 changes: 74 additions & 40 deletions zarr_libraries/ome_ngff/ome_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,55 +5,89 @@
from zarr_libraries import folder_size
from ome_zarr.io import parse_url
from ome_zarr.writer import write_image
from pathlib import Path
import matplotlib.pyplot as plt
import sys


def continuous_write(result_path: str, append_dim_size: int) -> tuple[list, list]:
file_sizes = []
bandwidths = []
class Ome_Zarr:
def __init__(self, shape: list, chunks: list) -> None:
self.abs_path_to_data = str((Path(__file__).parent / "../example_data/ome_zarr_data").resolve())
self.shape = shape
self.chunks = chunks

for i in range(1, append_dim_size + 1):

def __continuous_write(self, result_path: str, append_dim_size: int) -> tuple[list, list]:
file_sizes = []
bandwidths = []

for i in range(1, append_dim_size + 1):
new_shape = (self.shape[0] * i, *self.shape[1:]) # modify the append dimension, unpack the rest

store = parse_url(result_path, mode="w").store
root = zarr.group(store=store)

t = time.perf_counter()
zarr_data = np.random.randint(low=0, high=256, size=new_shape, dtype=np.uint8)
write_image(image=zarr_data, group=root, axes="tyx", storage_options=dict(chunks=self.chunks))
total_time = time.perf_counter() - t

print(f"Write #{i}\nOME-Zarr -> creating zarr : {total_time} seconds")
size = folder_size(result_path)
file_sizes.append(size * 10**-9) # converts bytes to GB
bandwidths.append((size * 10**-9) / total_time) # GB/s
shutil.rmtree(result_path)

return file_sizes, bandwidths


def __continuous_append(self, result_path: str, append_dim_size: int) -> tuple[list, list]:
file_sizes = []
bandwidths = []
total_time = 0

store = parse_url(result_path, mode="w").store
root = zarr.group(store=store)

t = time.perf_counter()
zarr_data = np.random.randint(low=0, high=256, size=((64 * i), 1080, 1920), dtype=np.uint8)
write_image(image=zarr_data, group=root, axes="tyx", storage_options=dict(chunks=(64, 540, 960)))
total_time = time.perf_counter() - t

print(f"Write #{i}\nOME-Zarr -> creating zarr : {total_time} seconds")
size = folder_size(result_path)
file_sizes.append(size * 10**-9) # converts bytes to GB
bandwidths.append((size * 10**-9) / total_time) # GB/s
zarr_data = np.random.randint(low=0, high=256, size=self.shape, dtype=np.uint8)
write_image(image=zarr_data, group=root, axes="tyx", storage_options=dict(chunks=self.chunks))
total_time += time.perf_counter() - t
shutil.rmtree(result_path)

return file_sizes, bandwidths


def continuous_write_append(result_path: str, append_dim_size: int) -> tuple[list, list]:
file_sizes = []
bandwidths = []
total_time = 0

store = parse_url(result_path, mode="w").store
root = zarr.group(store=store)
for i in range(2, append_dim_size + 1):
t = time.perf_counter()
new_data = np.random.randint(low=0, high=256, size=self.shape, dtype=np.uint8)
zarr_data = np.concatenate((zarr_data, new_data), axis=0)
write_image(image=zarr_data, group=root, axes="tyx", storage_options=dict(chunks=self.chunks))
total_time += time.perf_counter() - t

print(f"Write #{i}\nOME-Zarr -> append zarr : {total_time} seconds")
size = folder_size(result_path)
file_sizes.append(size * 10**-9) # converts bytes to GB
bandwidths.append((size * 10**-9) / total_time) # GB/s
shutil.rmtree(result_path)

return file_sizes, bandwidths

t = time.perf_counter()
zarr_data = np.random.randint(low=0, high=256, size=(64, 1080, 1920), dtype=np.uint8)
write_image(image=zarr_data, group=root, axes="tyx", storage_options=dict(chunks=(64, 540, 960)))
total_time += time.perf_counter() - t
shutil.rmtree(result_path)

for i in range(2, append_dim_size + 1):
t = time.perf_counter()
new_data = np.random.randint(low=0, high=256, size=((64 * i), 1080, 1920), dtype=np.uint8)
zarr_data = np.concatenate((zarr_data, new_data), axis=0)
write_image(image=zarr_data, group=root, axes="tyx", storage_options=dict(chunks=(64, 540, 960)))
total_time += time.perf_counter() - t
def continuous_write_test(self, append_dim_size: int) -> None:
print("\n\n--------OME-Zarr Stress Test--------\n\n")
file_sizes, bandwidths = self.__continuous_write(
result_path = self.abs_path_to_data + "/stressTest.zarr",
append_dim_size = append_dim_size
)
print("--------------------------------------------------------------\n\n")
plt.plot(file_sizes, bandwidths, label="ome-zarr writes")

print(f"Write #{i}\nOME-Zarr -> append zarr : {total_time} seconds")
size = folder_size(result_path)
file_sizes.append(size * 10**-9) # converts bytes to GB
bandwidths.append((size * 10**-9) / total_time) # GB/s
shutil.rmtree(result_path)

return file_sizes, bandwidths

def continuous_append_test(self, append_dim_size: int) -> None:
print("\n\n--------Ome-Zarr Append Stress Test--------\n\n")
file_sizes, bandwidths = self.__continuous_append(
result_path = self.abs_path_to_data + "/stressTestAppend.zarr",
append_dim_size = append_dim_size
)
print("--------------------------------------------------------------\n\n")
plt.plot(file_sizes, bandwidths, label="ome-zarr append")


Loading