Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Edward Grigoryan committed Aug 7, 2020
1 parent 23b8440 commit 07f51b9
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 7 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
FROM python:3.6
ADD ./ /workspace
WORKDIR /workspace
RUN pip install -r requirements_dev.txt
RUN pip install -r requirements-dev.txt
RUN pip install -e /workspace
44 changes: 44 additions & 0 deletions examples/to_tensorflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import numpy as np

import hub
import tensorflow as tf
import torch
from torch.utils.data import Dataset, DataLoader


def main():
tf.enable_eager_execution()
t1 = hub.tensor.from_array(np.array([1, 2, 3]))
t2 = hub.tensor.from_array(np.array([4, 5, 6]))
ds = hub.dataset.from_tensors({"first": t1, "second": t2})
ds = ds.to_tensorflow()
for x in ds:
print(x)


def main2():
tf.enable_eager_execution()
ds = hub.load("s3://snark-hub/public/coco/coco2017")

for i in range(len(ds)):
item = ds[i]
print(item)
for key, value in item.items():
print(key, value)
print(key, value.compute())
break

# ds = hub.load("s3://snark-hub/public/cifar/cifar10")
# ds = hub.load("s3://snark-hub/public/mnist/mnist")
# ds = ds.to_tensorflow()
ds = ds.to_pytorch()
ds = DataLoader(torch, num_workers=0, collate_fn=ds.collate_fn, batch_size=10)
for x in ds:
sample = x[0]
print(sample.keys())
print(sample["id"])
break


if __name__ == "__main__":
main2()
62 changes: 60 additions & 2 deletions hub/collections/dataset/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@
import numpy as np
import psutil

try:
import tensorflow as tf
import tensorflow_datasets as tfds
except ImportError:
pass

try:
import torch
except ImportError:
Expand Down Expand Up @@ -200,7 +206,8 @@ def keys(self):
def items(self):
""" Returns tensors
"""
yield from self._tensors.items()
for key, value in self._tensors.items():
yield (key, value)

def __getitem__(self, slices) -> "Dataset":
""" Returns a slice of dataset
Expand Down Expand Up @@ -457,6 +464,27 @@ def store(self, tag, creds=None, session_creds=True) -> "Dataset":
def to_pytorch(self, transform=None):
return TorchDataset(self, transform)

def to_tensorflow(self):
def tf_gen():
for i in range(len(self)):
sample = self[i : i + 1]
yield {key: value.compute()[0] for key, value in sample.items()}

def tf_dtype(np_dtype):
print(np_dtype)
try:
return tf.dtypes.as_dtype(np_dtype)
except Exception:
return tf.variant

return tf.data.Dataset.from_generator(
tf_gen,
output_types={
key: tf_dtype(self._tensors[key].dtype) for key in self.keys()
},
output_shapes={key: self._tensors[key].shape[1:] for key in self.keys()},
)


def _numpy_load(fs: fsspec.AbstractFileSystem, filepath: str) -> np.ndarray:
""" Given filesystem and filepath, loads numpy array
Expand Down Expand Up @@ -557,7 +585,7 @@ def __len__(self):

def __getitem__(self, index):
return self._do_transform(
{key: value.compute() for key, value in self._ds[index].items()}
{key: value.compute()[0] for key, value in self._ds[index].items()}
)

def __iter__(self):
Expand All @@ -581,3 +609,33 @@ def _dask_concat(arr):
return arr[0]
else:
return dask.array.concatenate(arr)


# class TensorflowDataset(tfds.core.GeneratorBasedBuilder):
# def _info(self):
# return tfds.core.DatasetInfo(
# builder=self,
# # This is the description that will appear on the datasets page.
# description=(
# "This is the dataset for xxx. It contains yyy. The "
# "images are kept at their original dimensions."
# ),
# # tfds.features.FeatureConnectors
# # features=tfds.features.FeaturesDict(
# # {
# # "image_description": tfds.features.Text(),
# # "image": tfds.features.Image(),
# # # Here, labels can be of 5 distinct values.
# # "label": tfds.features.ClassLabel(num_classes=5),
# # }
# # ),
# # If there's a common (input, target) tuple from the features,
# # specify them here. They'll be used if as_supervised=True in
# # builder.as_dataset.
# # supervised_keys=("image", "label"),
# # Homepage of the dataset for documentation
# homepage="https://dataset-homepage.org",
# # Bibtex citation for the dataset
# citation=r"""@article{my-awesome-dataset-2020,
# author = {Smith, John},"}""",
# )
5 changes: 5 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pytest
tensorflow>=1.14,<2
tensorflow-datasets>=3.0,<4
torch>=1,<2
waymo-open-dataset-tf-1-15-0
4 changes: 0 additions & 4 deletions requirements_dev.txt

This file was deleted.

0 comments on commit 07f51b9

Please sign in to comment.