Skip to content

Commit

Permalink
Merge pull request #12 from aliddell/add-acquire-again
Browse files Browse the repository at this point in the history
Add Acquire Zarr to benchmark suite
  • Loading branch information
aliddell authored Nov 14, 2024
2 parents 28db310 + 1bcde32 commit 2370075
Show file tree
Hide file tree
Showing 12 changed files with 171 additions and 32 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ __pycache__/
.vscode/
example_data/
build/
test.*
test.*
venv/
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "acquire-zarr"]
path = external/acquire-zarr
url = [email protected]:aliddell/acquire-zarr.git
54 changes: 40 additions & 14 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,22 +1,48 @@
cmake_minimum_required(VERSION 3.2)
project(cpp-zarr)
cmake_minimum_required(VERSION 3.23)
project(zarr-benchmarks)
cmake_policy(SET CMP0057 NEW)
cmake_policy(SET CMP0079 NEW)

set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED True)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

# finding the cppZarr lib
# Common dependencies
find_package(Python3 REQUIRED COMPONENTS Interpreter Development)
find_package(pybind11 REQUIRED)

# Dependencies for acquire-zarr
find_package(nlohmann_json CONFIG REQUIRED)
find_package(blosc CONFIG REQUIRED)
find_package(miniocpp CONFIG REQUIRED)

include_directories(
/usr/local/include
${Python3_INCLUDE_DIRS}
${pybind11_INCLUDE_DIR}
)

# construct bindings for cppZarr
find_library(cppZarrLib cppZarr REQUIRED)
pybind11_add_module(pyCppZarr zarr_libraries/cpp_zarr/cpp_zarr.cpp)
target_link_libraries(pyCppZarr PRIVATE ${cppZarrLib})

# setting include dir for dependencies
include_directories(/usr/local/include)
# build acquire-zarr
set(BUILD_PYTHON ON CACHE BOOL "Build Python bindings for acquire-zarr")
set(BUILD_TESTING OFF CACHE BOOL "Disable acquire-zarr tests")
add_subdirectory(external/acquire-zarr)

# pybind11
find_package(Python3 REQUIRED COMPONENTS Interpreter Development)
include_directories(${Python3_INCLUDE_DIRS})
add_custom_target(install-acquire-zarr-python
COMMAND ${Python3_EXECUTABLE} -m pip install .
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/external/acquire-zarr
DEPENDS acquire-zarr
COMMENT "Installing acquire-zarr Python package"
)

find_package(pybind11 REQUIRED)
include_directories(${pybind11_INCLUDE_DIR})
# Make the Python installation part of the default build
add_dependencies(pyCppZarr install-acquire-zarr-python)

pybind11_add_module(pyCppZarr zarr_libraries/cpp_zarr/cpp_zarr.cpp)
target_link_libraries(pyCppZarr PRIVATE ${cppZarrLib})
# Installation rules
install(TARGETS pyCppZarr
LIBRARY DESTINATION ${Python3_SITEARCH}/zarr_benchmarks
RUNTIME DESTINATION ${Python3_SITEARCH}/zarr_benchmarks)
12 changes: 12 additions & 0 deletions CMakePresets.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"version": 3,
"configurePresets": [
{
"name": "default",
"binaryDir": "${sourceDir}/build",
"cacheVariables": {
"CMAKE_TOOLCHAIN_FILE": "$env{VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake"
}
}
]
}
29 changes: 15 additions & 14 deletions benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@ def __init__(self, shape: list, chunks: list) -> None:
self.__chunks = chunks
self.__average_bandwidth = {}
self.__zarr_writers = {
"TensorStore" : Tensorstore(),
"Zarr Python" : Zarr_Python(),
"OME Zarr" : Ome_Zarr(),
"Cpp Zarr" : Cpp_Zarr()
"TensorStore" : Tensorstore(),
"Zarr Python" : Zarr_Python(),
"OME Zarr" : Ome_Zarr(),
"Cpp Zarr" : Cpp_Zarr(),
"Acquire Zarr" : AcquireZarr()
}


Expand Down Expand Up @@ -46,7 +47,7 @@ def run_write_tests(self, num_of_gigabytes: int, show_results: bool,
avg_graph: Optional[matplotlib.axes._axes.Axes] = None) -> None:

# error checking to see if chosen lib exists in test
if choose_lib != None and choose_lib not in set(self.__zarr_writers.keys()):
if choose_lib is not None and choose_lib not in set(self.__zarr_writers.keys()):
raise ValueError(f"There is no library of name \"{choose_lib}\".")

gb_in_bytes = 1073741824 # represents number of bytes in a GB
Expand Down Expand Up @@ -77,11 +78,11 @@ def run_write_tests(self, num_of_gigabytes: int, show_results: bool,
shutil.rmtree(writer.data_path)

# if a specified library is chosen for testing, skip any that isn't that test
if choose_lib != None and choose_lib != lib_name:
if choose_lib is not None and choose_lib != lib_name:
continue

# store time taken to write zarr
if lib_name == "TensorStore" or lib_name == "Zarr Python":
if lib_name in ("TensorStore", "Zarr Python", "Acquire Zarr"):
total_time = writer.write_zarr(shape=new_shape, chunks=self.chunks, zarr_data=zarr_data)
elif lib_name == "OME Zarr":
total_time = writer.write_zarr(chunks=self.chunks, zarr_data=zarr_data)
Expand All @@ -102,7 +103,7 @@ def run_write_tests(self, num_of_gigabytes: int, show_results: bool,
# plot the data and clean up the folders
for lib_name, writer in self.__zarr_writers.items():
# if a specified library is chosen for testing, skip any that isn't that test
if choose_lib != None and choose_lib != lib_name:
if choose_lib is not None and choose_lib != lib_name:
continue

# cleans up data left behind
Expand All @@ -125,11 +126,11 @@ def run_append_tests(self, num_of_gigabytes: int, show_results: bool,
avg_graph: Optional[matplotlib.axes._axes.Axes] = None) -> None:

# error checking to see if chosen lib exists in test
if choose_lib != None and choose_lib not in set(self.__zarr_writers.keys()):
if choose_lib is not None and choose_lib not in set(self.__zarr_writers.keys()):
raise ValueError(f"There is no library of name \"{choose_lib}\".")

# these are the only libraries that allow for appending of data
if choose_lib != None and choose_lib != "TensorStore" and choose_lib != "Zarr Python":
if choose_lib is not None and choose_lib != "TensorStore" and choose_lib != "Zarr Python":
return

gb_in_bytes = 1073741824 # represents number of bytes in a GB
Expand Down Expand Up @@ -157,7 +158,7 @@ def run_append_tests(self, num_of_gigabytes: int, show_results: bool,

for lib_name, writer in self.__zarr_writers.items():
# if a specified library is chosen for testing, skip any that isn't that test
if choose_lib != None and choose_lib != lib_name:
if choose_lib is not None and choose_lib != lib_name:
continue

# store time taken to append data
Expand All @@ -184,7 +185,7 @@ def run_append_tests(self, num_of_gigabytes: int, show_results: bool,
continue

# if a specified library is chosen for testing, skip any that isn't that test
if choose_lib != None and choose_lib != lib_name:
if choose_lib is not None and choose_lib != lib_name:
continue

shutil.rmtree(writer.data_path)
Expand All @@ -204,7 +205,7 @@ def run_all_tests(self, append_test_gigabytes: int, write_test_gigabytes: int,
append_graph: Optional[matplotlib.axes._axes.Axes] = None, append_avg_graph: Optional[matplotlib.axes._axes.Axes] = None,
write_graph: Optional[matplotlib.axes._axes.Axes] = None, write_avg_graph: Optional[matplotlib.axes._axes.Axes] = None) -> None:

self.run_append_tests(num_of_gigabytes=append_test_gigabytes, show_results=False, choose_lib=choose_lib, graph=append_graph, avg_graph=append_avg_graph)
self.run_write_tests(num_of_gigabytes=write_test_gigabytes, show_results=False, choose_lib=choose_lib, graph=write_graph, avg_graph=write_avg_graph)
self.run_append_tests(num_of_gigabytes=append_test_gigabytes, show_results=False, choose_lib=choose_lib, graph=append_graph, avg_graph=append_avg_graph)
self.__print_results(additional_info=(f"Write Test GB Soft Cap: {write_test_gigabytes}GB | Append Test GB Soft Cap: {append_test_gigabytes}GB"))


1 change: 1 addition & 0 deletions external/acquire-zarr
Submodule acquire-zarr added at b0f23e
4 changes: 2 additions & 2 deletions setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ echo "Directory '$BUILD_DIR' has been created successfully."

# build cpp code
cd "$BUILD_DIR" || { echo "Failed to change directory to '$BUILD_DIR'"; exit 1; }
cmake .. || { echo "Failed to run cmake '$BUILD_DIR'"; exit 1; }
make || { echo "Failed to run make"; exit 1; }
cmake --preset=default .. || { echo "Failed to run cmake '$BUILD_DIR'"; exit 1; }
cmake --build . || { echo "Failed to run make"; exit 1; }

echo "Build process completed successfully."
14 changes: 14 additions & 0 deletions vcpkg-configuration.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"default-registry": {
"kind": "git",
"baseline": "f7423ee180c4b7f40d43402c2feb3859161ef625",
"repository": "https://github.com/microsoft/vcpkg"
},
"registries": [
{
"kind": "artifact",
"location": "https://github.com/microsoft/vcpkg-ce-catalog/archive/refs/heads/main.zip",
"name": "microsoft"
}
]
}
16 changes: 16 additions & 0 deletions vcpkg.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"dependencies": [
{
"name": "blosc",
"version>=": "1.21.5"
},
{
"name": "nlohmann-json",
"version>=": "3.11.3"
},
{
"name": "minio-cpp",
"version>=": "0.3.0"
}
]
}
3 changes: 2 additions & 1 deletion zarr_libraries/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
from zarr_libraries.tensorstore.tensorstore_zarr import *
from zarr_libraries.zarr_python.zarr_python import *
from zarr_libraries.ome_ngff.ome_zarr import *
from zarr_libraries.cpp_zarr.cpp_zarr import *
from zarr_libraries.cpp_zarr.cpp_zarr import *
from zarr_libraries.acquire_zarr import AcquireZarr
62 changes: 62 additions & 0 deletions zarr_libraries/acquire_zarr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import acquire_zarr as aqz

from pathlib import Path
import numpy as np
import time


class AcquireZarr:
def __init__(self) -> None:
aqz.set_log_level(aqz.LogLevel.Error)
base_dir = Path(__file__).parent
self.__path_to_data = (
base_dir / "example_data" / "acquire_zarr_data" / "test.zarr"
).resolve()

if not self.__path_to_data.parent.exists():
self.__path_to_data.parent.mkdir(parents=True)

@property
def data_path(self) -> str:
return str(self.__path_to_data)

def write_zarr(self, shape: list, chunks: list, zarr_data: np.ndarray) -> float:
settings = aqz.StreamSettings()
settings.store_path = self.data_path
settings.compression = aqz.CompressionSettings(
compressor=aqz.Compressor.BLOSC1,
codec=aqz.CompressionCodec.BLOSC_LZ4,
level=1,
shuffle=0
)
settings.data_type=aqz.DataType.UINT8
settings.dimensions.extend(
[
aqz.Dimension(
name="t",
kind=aqz.DimensionType.TIME,
array_size_px=shape[0],
chunk_size_px=chunks[0],
),
aqz.Dimension(
name="y",
kind=aqz.DimensionType.SPACE,
array_size_px=shape[1],
chunk_size_px=chunks[1],
),
aqz.Dimension(
name="x",
kind=aqz.DimensionType.SPACE,
array_size_px=shape[2],
chunk_size_px=chunks[2],
),
]
)
settings.version = aqz.ZarrVersion.V2
stream = aqz.ZarrStream(settings)

t = time.perf_counter()
stream.append(zarr_data)
total_time = time.perf_counter() - t

return total_time
2 changes: 2 additions & 0 deletions zarr_libraries/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

# getting size of zarr folder recursively
def folder_size_in_bytes(folder: str) -> int:
if not os.path.exists(folder):
return 0
total_size = os.path.getsize(folder)

for item in os.listdir(folder):
Expand Down

0 comments on commit 2370075

Please sign in to comment.