Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Acquire-Zarr to benchmark suite #11

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@ __pycache__/
.vscode/
example_data/
build/
test.*
test.*
.idea/
cmake-build-*
11 changes: 8 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
cmake_minimum_required(VERSION 3.2)
project(cpp-zarr)
cmake_minimum_required(VERSION 3.3)
project(zarr-writers-benchmark)
cmake_policy(SET CMP0057 NEW)

set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED True)

include(cmake/acquire-zarr.cmake)

# finding the cppZarr lib
find_library(cppZarrLib cppZarr REQUIRED)

Expand All @@ -19,4 +21,7 @@ find_package(pybind11 REQUIRED)
include_directories(${pybind11_INCLUDE_DIR})

pybind11_add_module(pyCppZarr zarr_libraries/cpp_zarr/cpp_zarr.cpp)
target_link_libraries(pyCppZarr PRIVATE ${cppZarrLib})
target_link_libraries(pyCppZarr PRIVATE ${cppZarrLib})

pybind11_add_module(pyAcquireZarr zarr_libraries/acquire_zarr/acquire-zarr.cpp)
target_link_libraries(pyAcquireZarr PRIVATE acquire-zarr)
74 changes: 44 additions & 30 deletions benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,30 @@ def __init__(self, shape: list, chunks: list) -> None:
"TensorStore" : Tensorstore(),
"Zarr Python" : Zarr_Python(),
"OME Zarr" : Ome_Zarr(),
"Cpp Zarr" : Cpp_Zarr()
"Cpp Zarr" : Cpp_Zarr(),
"Acquire Zarr": Acquire_Zarr()
}


''' These functions are intended to be "private" and for use only inside the class '''
def __set_write_functions(self, shape: list, zarr_data: np.ndarray) -> None:
self.__write_zarr = {
"TensorStore" : lambda: self.__zarr_writers["TensorStore"].write_zarr(shape=shape, chunks=self.chunks, zarr_data=zarr_data),
"Zarr Python" : lambda: self.__zarr_writers["Zarr Python"].write_zarr(shape=shape, chunks=self.chunks, zarr_data=zarr_data),
"OME Zarr" : lambda: self.__zarr_writers["OME Zarr"].write_zarr(chunks=self.chunks, zarr_data=zarr_data),
"Cpp Zarr" : lambda: self.__zarr_writers["Cpp Zarr"].write_zarr(shape=shape, chunks=self.chunks),
"Acquire Zarr" : lambda: self.__zarr_writers["Acquire Zarr"].write_zarr(shape=shape, chunks=self.chunks)
}


def __set_append_functions(self,new_shape: list, zarr_data: np.ndarray, multiplier: int) -> None:
self.__append_zarr = {
"TensorStore" : lambda: self.__zarr_writers["TensorStore"].append_zarr(shape=self.shape, chunks=self.chunks, new_shape=new_shape, zarr_data=zarr_data, multiplier=multiplier),
"Zarr Python" : lambda: self.__zarr_writers["Zarr Python"].append_zarr(shape=self.shape, chunks=self.chunks, zarr_data=zarr_data),
"Acquire Zarr" : lambda: self.__zarr_writers["Acquire Zarr"].append_zarr(shape=self.shape, chunks=self.chunks, zarr_data=zarr_data)
}


''' These functions are intended to be "private" and for use only inside the class '''
def __print_results(self, additional_info: Optional[str] = None):
if additional_info: print(additional_info)

Expand Down Expand Up @@ -85,7 +104,7 @@ def run_write_tests(self, num_of_gigabytes: int, show_results: bool,
total_time = writer.write_zarr(shape=new_shape, chunks=self.chunks, zarr_data=zarr_data)
elif lib_name == "OME Zarr":
total_time = writer.write_zarr(chunks=self.chunks, zarr_data=zarr_data)
elif lib_name == "Cpp Zarr":
elif lib_name == "Cpp Zarr" or lib_name == "Acquire Zarr":
total_time = writer.write_zarr(shape=new_shape, chunks=self.chunks)

# prints info to the terminal
Expand Down Expand Up @@ -134,38 +153,33 @@ def run_append_tests(self, num_of_gigabytes: int, show_results: bool,

gb_in_bytes = 1073741824 # represents number of bytes in a GB
write_size = np.prod(self.shape) # amount of bytes appended on in each function call
multiplier = 1 # multiplier that increases shape of zarr folder written
curr_data_size = 0 # test will run until curr_data_size reaches specified GB size passed into the function
write_speeds = defaultdict(list) # dict that holds the write speeds for every lib tested
write_numbers = [] # keeps track of writes in list for graphing purposes

print(f"\n\n--------Append Stress Test--------\n\n")

while curr_data_size < (num_of_gigabytes * gb_in_bytes):
# modify the append dimension, unpack the rest
new_shape = [self.shape[0] * (multiplier), *self.shape[1:]]
for lib_name, writer in self.__zarr_writers.items():
# these are the only libraries that allow for appending of data
if not lib_name in ("TensorStore", "Zarr Python", "Acquire Zarr"):
continue

# 3d array filled with 1 byte ints so multiplication gives accurate size in bytes
curr_data_size = np.prod(new_shape)
# if a specified library is chosen for testing, skip any that isn't that test
if choose_lib != None and choose_lib != lib_name: continue

# creating new data and adjusting the shape
zarr_data = np.random.randint(low=0, high=256, size=self.shape, dtype=np.uint8)
print(f"\n\n--------{lib_name} Append Stress Test--------\n\n")

print("--------------------------------------------------------------------")
print(f"Current shape : {new_shape} | Current multiplier {multiplier}x")
print("--------------------------------------------------------------------")
multiplier = 1 # multiplier that increases shape of zarr folder written
curr_data_size = 0 # test will run until curr_data_size reaches specified GB size passed into the function
write_speeds = []
write_numbers = []

for lib_name, writer in self.__zarr_writers.items():
# if a specified library is chosen for testing, skip any that isn't that test
if choose_lib != None and choose_lib != lib_name:
continue
while curr_data_size < (num_of_gigabytes * gb_in_bytes):
# modify the append dimension, unpack the rest
new_shape = [self.shape[0] * (multiplier), *self.shape[1:]]

# creating new data and adjusting the shape
zarr_data = np.random.randint(low=0, high=256, size=self.shape, dtype=np.uint8)

# store time taken to append data
if lib_name == "TensorStore":
total_time = writer.append_zarr(shape=self.shape, chunks=self.chunks, new_shape=new_shape, zarr_data=zarr_data, multiplier=multiplier)
elif lib_name == "Zarr Python":
total_time = writer.append_zarr(shape=self.shape, chunks=self.chunks, zarr_data=zarr_data)

# returns time taken to write zarr folder / both libraries use a different approach hence the if statements
self.__set_append_functions(new_shape=new_shape, zarr_data=zarr_data, multiplier=multiplier)
total_time = self.__append_zarr[lib_name]() # calling a lambda function inside of a dictionary

# prints info to the terminal
print(f"{lib_name} -> appending zarr : {total_time} seconds")
print(f"The zarr folder is of size {formatted_folder_size(writer.data_path)}\n\n")
Expand All @@ -180,7 +194,7 @@ def run_append_tests(self, num_of_gigabytes: int, show_results: bool,
# plot the data collected
for lib_name, writer in self.__zarr_writers.items():
# these are the only libraries that allow for appending of data
if lib_name != "TensorStore" and lib_name != "Zarr Python":
if not lib_name in ("TensorStore", "Zarr Python", "Acquire Zarr"):
continue

# if a specified library is chosen for testing, skip any that isn't that test
Expand Down
20 changes: 20 additions & 0 deletions cmake/acquire-zarr.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
find_path(ACQUIRE_ZARR_ROOT_DIR
NAMES "include/zarr.types.h"
PATHS
$ENV{HOME}/.local
DOC "Acquire-Zarr location"
NO_CACHE
)

if (ACQUIRE_ZARR_ROOT_DIR)
message(STATUS "Acquire-Zarr found: ${ACQUIRE_ZARR_ROOT_DIR}")

set(lib acquire-zarr)
add_library(${lib} STATIC IMPORTED GLOBAL)
target_include_directories(${lib} INTERFACE ${ACQUIRE_ZARR_ROOT_DIR}/include)
set_target_properties(${lib} PROPERTIES
IMPORTED_LOCATION ${ACQUIRE_ZARR_ROOT_DIR}/lib/libacquire-zarr.so
)
else ()
message(FATAL_ERROR "Acquire-Zarr NOT FOUND")
endif ()
3 changes: 2 additions & 1 deletion zarr_libraries/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
from zarr_libraries.tensorstore.tensorstore_zarr import *
from zarr_libraries.zarr_python.zarr_python import *
from zarr_libraries.ome_ngff.ome_zarr import *
from zarr_libraries.cpp_zarr.cpp_zarr import *
from zarr_libraries.cpp_zarr.cpp_zarr import *
from zarr_libraries.acquire_zarr.acquire_zarr import *
147 changes: 147 additions & 0 deletions zarr_libraries/acquire_zarr/acquire-zarr.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
#include "zarr.h"

#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>

#include <chrono>

namespace py = pybind11;
namespace chrono = std::chrono;

namespace inner {
std::vector<uint64_t> chunks;
std::vector<uint64_t> shape;
ZarrStream *stream = nullptr;

float append_zarr(std::string path, std::vector<uint64_t> chunks,
std::vector<uint64_t> shape, std::vector<uint8_t> data) {
auto *settings = ZarrStreamSettings_create();

ZarrStreamSettings_set_store(settings, path.c_str(), path.size() + 1,
nullptr);

ZarrStreamSettings_reserve_dimensions(settings, 3);
ZarrDimensionProperties dim = {
.name = "t",
.bytes_of_name = sizeof("t"),
.kind = ZarrDimensionType_Time,
.array_size_px = static_cast<uint32_t>(shape[0]),
.chunk_size_px = static_cast<uint32_t>(chunks[0]),
.shard_size_chunks = 0};
ZarrStreamSettings_set_dimension(settings, 0, &dim);

dim = {.name = "y",
.bytes_of_name = sizeof("y"),
.kind = ZarrDimensionType_Space,
.array_size_px = static_cast<uint32_t>(shape[1]),
.chunk_size_px = static_cast<uint32_t>(chunks[1]),
.shard_size_chunks = 0};
ZarrStreamSettings_set_dimension(settings, 1, &dim);

dim = {.name = "x",
.bytes_of_name = sizeof("x"),
.kind = ZarrDimensionType_Space,
.array_size_px = static_cast<uint32_t>(shape[2]),
.chunk_size_px = static_cast<uint32_t>(chunks[2]),
.shard_size_chunks = 0};
ZarrStreamSettings_set_dimension(settings, 2, &dim);

auto *stream = ZarrStream_create(settings, ZarrVersion_2);

auto begin_time = chrono::high_resolution_clock::now();
size_t bytes_out;
ZarrStream_append(stream, data.data(), data.size(), &bytes_out);
chrono::duration<float, std::ratio<1>> duration =
chrono::high_resolution_clock::now() - begin_time;

ZarrStreamSettings_destroy(settings);

return duration.count();
}
} // namespace inner

void reset_stream(const std::vector<uint64_t> &chunks,
const std::vector<uint64_t> &shape) {
ZarrStream_destroy(inner::stream);
inner::stream = nullptr;
inner::chunks = chunks;
inner::shape = shape;
}

float append_zarr(std::string path, std::vector<uint64_t> chunks,
std::vector<uint64_t> shape, py::array_t<uint8_t> data) {
if (inner::chunks.empty() || inner::shape.empty() ||
inner::chunks.size() != chunks.size() ||
inner::shape.size() != shape.size()) {
reset_stream(chunks, shape);
} else {
for (auto i = 0; i < chunks.size(); i++) {
if (chunks[i] != inner::chunks[i] || shape[i] != inner::shape[i]) {
reset_stream(chunks, shape);
break;
}
}
}

if (!inner::stream) {
auto *settings = ZarrStreamSettings_create();

ZarrStreamSettings_set_store(settings, path.c_str(), path.size() + 1,
nullptr);

ZarrStreamSettings_reserve_dimensions(settings, 3);
ZarrDimensionProperties dim = {
.name = "t",
.bytes_of_name = sizeof("t"),
.kind = ZarrDimensionType_Time,
.array_size_px = static_cast<uint32_t>(shape[0]),
.chunk_size_px = static_cast<uint32_t>(chunks[0]),
.shard_size_chunks = 0};
ZarrStreamSettings_set_dimension(settings, 0, &dim);

dim = {.name = "y",
.bytes_of_name = sizeof("y"),
.kind = ZarrDimensionType_Space,
.array_size_px = static_cast<uint32_t>(shape[1]),
.chunk_size_px = static_cast<uint32_t>(chunks[1]),
.shard_size_chunks = 0};
ZarrStreamSettings_set_dimension(settings, 1, &dim);

dim = {.name = "x",
.bytes_of_name = sizeof("x"),
.kind = ZarrDimensionType_Space,
.array_size_px = static_cast<uint32_t>(shape[2]),
.chunk_size_px = static_cast<uint32_t>(chunks[2]),
.shard_size_chunks = 0};
ZarrStreamSettings_set_dimension(settings, 2, &dim);
inner::stream = ZarrStream_create(settings, ZarrVersion_2);

ZarrStreamSettings_destroy(settings);
}

auto begin_time = chrono::high_resolution_clock::now();
size_t bytes_out;
ZarrStream_append(inner::stream, data.data(), data.size(), &bytes_out);
chrono::duration<float, std::ratio<1>> duration =
chrono::high_resolution_clock::now() - begin_time;

return duration.count();
}

float write_zarr(std::string path, std::vector<uint64_t> chunks,
std::vector<uint64_t> shape) {

std::vector<uint8_t> data(shape[0] * shape[1] * shape[2]);
srand((unsigned int)time(NULL));
for (auto &elem : data) {
elem = (uint8_t)(rand() % (UINT8_MAX + 1));
}

return inner::append_zarr(path, chunks, shape, data);
}

PYBIND11_MODULE(pyAcquireZarr, handle) {
handle.def("append_zarr", &append_zarr);
handle.def("write_zarr", &write_zarr);
}
23 changes: 23 additions & 0 deletions zarr_libraries/acquire_zarr/acquire_zarr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from build.pyAcquireZarr import *

from pathlib import Path
import numpy as np


class Acquire_Zarr:
def __init__(self) -> None:
self.__path_to_data = str(
(
Path(__file__).parent / "../example_data/acquire_zarr_data/test.zarr"
).resolve()
)

@property
def data_path(self) -> str:
return self.__path_to_data

def append_zarr(self, shape: list, chunks: list, zarr_data: np.ndarray) -> float:
return append_zarr(self.data_path, chunks, shape, zarr_data)

def write_zarr(self, shape: list, chunks: list) -> float:
return write_zarr(self.data_path, chunks, shape)
Loading