Skip to content

Commit

Permalink
png compression uses 4 times smaller chunks now
Browse files Browse the repository at this point in the history
  • Loading branch information
Edward Grigoryan committed Dec 15, 2020
1 parent b7b2db6 commit 686cd31
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 5 deletions.
2 changes: 1 addition & 1 deletion benchmarks/bench_png_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


img_path = "./benchmarks/sample.png"
count = 12
count = 20


def bench_pil_compression(img_path=img_path, count=count):
Expand Down
1 change: 1 addition & 0 deletions hub/defaults.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
CHUNK_DEFAULT_SIZE = 2 ** 24
OBJECT_CHUNK = 128
DEFAULT_COMPRESSOR = "default"
7 changes: 5 additions & 2 deletions hub/store/dynamic_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from hub.store.nested_store import NestedStore
from hub.store.shape_detector import ShapeDetector
from hub.defaults import DEFAULT_COMPRESSOR

from hub.exceptions import (
DynamicTensorNotFoundException,
Expand Down Expand Up @@ -42,7 +43,7 @@ def __init__(
max_shape=None,
dtype="float64",
chunks=None,
compressor="default",
compressor=DEFAULT_COMPRESSOR,
):
"""Constructor
Parameters
Expand All @@ -64,7 +65,9 @@ def __init__(
"""
if not (shape is None):
# otherwise shape detector fails
shapeDt = ShapeDetector(shape, max_shape, chunks, dtype)
shapeDt = ShapeDetector(
shape, max_shape, chunks, dtype, compressor=compressor
)
shape = shapeDt.shape
max_shape = shapeDt.max_shape
chunks = shapeDt.chunks
Expand Down
13 changes: 11 additions & 2 deletions hub/store/shape_detector.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from hub.numcodecs import PngCodec
import math

import numpy as np

from hub.defaults import CHUNK_DEFAULT_SIZE, OBJECT_CHUNK
from hub.defaults import CHUNK_DEFAULT_SIZE, OBJECT_CHUNK, DEFAULT_COMPRESSOR
from hub.exceptions import HubException


Expand All @@ -17,19 +18,27 @@ def __init__(
dtype="float64",
chunksize=CHUNK_DEFAULT_SIZE,
object_chunking=OBJECT_CHUNK,
compressor=DEFAULT_COMPRESSOR,
):
self._int32max = np.iinfo(np.dtype("int32")).max

self._dtype = dtype = np.dtype(dtype)
self._chunksize = chunksize
self._object_chunking = object_chunking
self._compressor = compressor

self._chunksize = chunksize = self._get_chunksize(chunksize, compressor)
self._shape = shape = self._get_shape(shape)
self._max_shape = max_shape = self._get_max_shape(shape, max_shape)
self._chunks = chunks = self._get_chunks(
shape, max_shape, chunks, dtype, chunksize
)

def _get_chunksize(self, chunksize, compressor):
if isinstance(compressor, PngCodec):
return int(math.ceil(0.25 * chunksize))
else:
return chunksize

def _get_shape(self, shape):
assert shape is not None
shape = (shape,) if isinstance(shape, int) else tuple(shape)
Expand Down
6 changes: 6 additions & 0 deletions hub/store/tests/test_shape_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@ def test_shape_detector():
assert s.chunks[1:] == (10, 10)


def test_shape_detector_2():
s = ShapeDetector((10, 10, 10), 10, compressor="png")
assert str(s.dtype) == "float64"
assert s.chunks[1:] == (10, 10)


def test_shape_detector_wrong_shape():
try:
ShapeDetector((10, 10, 10), (10, 10, 20))
Expand Down

0 comments on commit 686cd31

Please sign in to comment.