Skip to content

Commit

Permalink
Autofix formatting issues with benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
benchislett committed Jan 23, 2021
1 parent 25f9783 commit 81a302f
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 37 deletions.
4 changes: 3 additions & 1 deletion benchmarks/benchmark_compress_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@

REPEAT_TIMES = 100


def bench_pil_compression(times=REPEAT_TIMES):
with Timer("PIL compression"):
for i in range(times):
b = BytesIO()
IMG.save(b, format="png")


def bench_hub_compression(times=REPEAT_TIMES):
arr = np.array(IMG)
ds = hub.Dataset(
Expand All @@ -33,7 +35,7 @@ def bench_hub_compression(times=REPEAT_TIMES):
with Timer("Hub compression"):
ds["image", :times] = batch


if __name__ == "__main__":
bench_pil_compression()
bench_hub_compression()

51 changes: 23 additions & 28 deletions benchmarks/benchmark_dataset_iter.py
Original file line number Diff line number Diff line change
@@ -1,59 +1,54 @@
import time
import torch
import tensorflow as tf

from hub import Dataset
from hub.utils import Timer
from hub.schema.features import (
Primitive,
Tensor,
SchemaDict,
HubSchema,
featurify,
)

DATASET_NAMES = ['activeloop/mnist', 'activeloop/cifar10_train']
DATASET_NAMES = ["activeloop/mnist", "activeloop/cifar10_train"]

BATCH_SIZES = [1, 16, 128]

PREFETCH_SIZES = [1, 4, 16, 128]

def time_iter_pytorch(dataset_name="activeloop/mnist",
batch_size=1,
prefetch_factor=0,
process=None):

dset = Dataset(dataset_name, cache=False, storage_cache=False, mode='r')
def time_iter_pytorch(
dataset_name="activeloop/mnist", batch_size=1, prefetch_factor=0, process=None
):

loader = torch.utils.data.DataLoader(
dset.to_pytorch(),
batch_size=batch_size,
prefetch_factor=prefetch_factor,
num_workers=1
)
dset = Dataset(dataset_name, cache=False, storage_cache=False, mode="r")

with Timer(f"{dataset_name} PyTorch prefetch {prefetch_factor:03} in batches of {batch_size:03}"):
loader = torch.utils.data.DataLoader(
dset.to_pytorch(),
batch_size=batch_size,
prefetch_factor=prefetch_factor,
num_workers=1,
)

with Timer(
f"{dataset_name} PyTorch prefetch {prefetch_factor:03} in batches of {batch_size:03}"
):
for idx, (image, label) in enumerate(loader):
if process is not None:
process(idx, image, label)


def time_iter_tensorflow(dataset_name="activeloop/mnist",
batch_size=1,
prefetch_factor=0,
process=None):
def time_iter_tensorflow(
dataset_name="activeloop/mnist", batch_size=1, prefetch_factor=0, process=None
):

dset = Dataset(dataset_name, cache=False, storage_cache=False, mode='r')
dset = Dataset(dataset_name, cache=False, storage_cache=False, mode="r")

loader = dset.to_tensorflow().batch(batch_size).prefetch(prefetch_factor)

with Timer(f"{dataset_name} TF prefetch {prefetch_factor:03} in batches of {batch_size:03}"):
with Timer(
f"{dataset_name} TF prefetch {prefetch_factor:03} in batches of {batch_size:03}"
):
for idx, batch in enumerate(loader):
image = batch["image"]
label = batch["label"]
if process is not None:
process(idx, image, label)


if __name__ == "__main__":
for name in DATASET_NAMES:
for size in BATCH_SIZES:
Expand Down
22 changes: 14 additions & 8 deletions benchmarks/benchmark_random_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,27 @@
from hub import Dataset
from hub.utils import Timer

DATASET_NAMES = ['activeloop/mnist',
'activeloop/omniglot_test',
'activeloop/cifar10_train',
'activeloop/cifar100_train']
DATASET_NAMES = [
"activeloop/mnist",
"activeloop/omniglot_test",
"activeloop/cifar10_train",
"activeloop/cifar100_train",
]

SPAN_POWER_MAX = 10

def time_random_access(dataset_name="activeloop/mnist", offset=1000, span=1000, field="image"):

def time_random_access(
dataset_name="activeloop/mnist", offset=1000, span=1000, field="image"
):
dset = Dataset(dataset_name, cache=False, storage_cache=False)
with Timer(f"{dataset_name} read at offset {offset:03} of length {span:03}"):
dset[field][offset:offset+span].compute()
dset[field][offset : offset + span].compute()


if __name__ == "__main__":
for name in DATASET_NAMES:
for span in range(SPAN_POWER_MAX):
offset = randint(0,999)
time_random_access(name, offset, 2**span)
offset = randint(0, 999)
time_random_access(name, offset, 2 ** span)
print()

0 comments on commit 81a302f

Please sign in to comment.