Skip to content

Commit

Permalink
Merge branch 'master' into LabelsUpdate
Browse files Browse the repository at this point in the history
  • Loading branch information
davidbuniat authored Dec 30, 2020
2 parents 5c7ccde + 6f34b14 commit 32ae235
Show file tree
Hide file tree
Showing 43 changed files with 3,342 additions and 497 deletions.
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/feature_request.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: Feature request
about: Suggest an idea for this project
title: "[FEATURE]"
labels: "i: enhancement, i: needs triage"
assignees: ""
assignees: ''

---

Expand Down
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -185,4 +185,8 @@ logs/
.creds/
.idea/
waymo/
output/
output/
cov.xml
hub/api/cov.xml
hub/api/nested_seq
nested_seq
16 changes: 12 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<p align="center">
<br>
<img src="https://raw.githubusercontent.com/activeloopai/Hub/master/docs/logo/hub_logo_explainer.png" width="50%"/>
<img src="https://raw.githubusercontent.com/activeloopai/Hub/master/docs/logo/logo-explainer-bg.png" width="50%"/>
</br>
</p>

Expand Down Expand Up @@ -164,9 +164,17 @@ As always, thanks to our amazing contributors! </br>


## Examples
Activeloop’s Hub format lets you achieve faster inference at a lower cost. Test out the datasets we’ve converted into Hub format - see for yourself!
- [Waymo Open Dataset](https://medium.com/snarkhub/extending-snark-hub-capabilities-to-handle-waymo-open-dataset-4dc7b7d8ab35)
- [Aptiv nuScenes](https://medium.com/snarkhub/snark-hub-is-hosting-nuscenes-dataset-for-autonomous-driving-1470ae3e1923)
Activeloop’s Hub format lets you achieve faster inference at a lower cost. We have 30+ popular datasets already on our platform. These include:-
- COCO
- CIFAR-10
- PASCAL VOC
- Cars196
- KITTI
- EuroSAT
- Caltech-UCSD Birds 200
- Food101

Check these and many more popular datasets on our [visualizer web app](https://app.activeloop.ai/datasets/popular) and load them directly for model training!


## Disclaimers
Expand Down
53 changes: 53 additions & 0 deletions benchmarks/benchmark_to_pytorch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import torchvision
import torch
import numpy as np

import hub
from hub.utils import Timer


class HubAdapter2(torch.utils.data.Dataset):
def __init__(self, ods):
self.ds = ods

def __len__(self):
return min(len(self.ds), 1000 * 1000)

@property
def shape(self):
return (self.ds.__len__(), None, None, None)

def __iter__(self):
for i in range(len(self)):
yield self[i]

def __getitem__(self, index):
x, y = self.ds.__getitem__(index)
res = {"image": np.array(x), "label": y}
return res


def test():
tv_cifar_ds = torchvision.datasets.CIFAR10(".", download=True)

hub_cifar = HubAdapter2(tv_cifar_ds)

pt2hb_ds = hub.Dataset.from_pytorch(hub_cifar, scheduler="threaded", workers=8)
res_ds = pt2hb_ds.store("./data/test/cifar/train")
hub_s3_ds = hub.Dataset(
url="./data/test/cifar/train", cache=False, storage_cache=False
)
print(hub_s3_ds._tensors["/image"].chunks)
hub_s3_ds = hub_s3_ds.to_pytorch()
dl = torch.utils.data.DataLoader(hub_s3_ds, batch_size=100, num_workers=8)
with Timer("Time"):
counter = 0
for i, b in enumerate(dl):
with Timer("Batch Time"):
x, y = b["image"], b["image"]
counter += 100
print(counter)


if __name__ == "__main__":
test()
Binary file added docs/logo/logo-explainer-bg.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
sphinx==3.1.2
sphinx_markdown_tables==0.0.15
recommonmark==0.6.0
recommonmark==0.7.1
sphinx_rtd_theme==0.5.0
99 changes: 99 additions & 0 deletions examples/mpii_data_upload_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import json
import time
import numpy as np
from PIL import Image
from tqdm import tqdm

import hub
from hub import Dataset, schema
from hub.schema import Tensor, Text

"""
Below we will define a schema for our dataset. Schema is kind of
a container to specify structure, shape, dtype and meta information
of our dataset. We have different types of schemas for different
types of data like image, tensor, text. More info. on docs.
"""
mpii_schema = {
"""
we specify 'shape' as None for variable image size, and we
give 'max_shape' arguement a maximum possible size of image.
"""
"image": schema.Image(
shape=(None, None, 3), max_shape=(1920, 1920, 3), dtype="uint8"
),
"isValidation": "float64",
"img_paths": Text(shape=(None,), max_shape=(15,)),
"img_width": "int32",
"img_height": "int32",
"objpos": Tensor(max_shape=(100,), dtype="float64"),
"""
'joint_self' has nested list structure
"""
"joint_self": Tensor(shape=(None, None), max_shape=(100, 100), dtype="float64"),
"scale_provided": "float64",
"annolist_index": "int32",
"people_index": "int32",
"numOtherPeople": "int32",
}


"""
Below function takes JSON file and gives annotations in the
form of dictionary inside list.
"""


def get_anno(jsonfile):

with open(jsonfile) as f:
instances = json.load(f)

annotations = []
for i in range(len(instances)):
annotations.append(instances[i])
return annotations


"""
Hub Transform is optimized to give efficient processing and
storing of dataset. Below function takes a dataset and applies
transform on every sample(instance) of dataset, and outputs a
dataset with specified schema. More info. on docs.
"""


@hub.transform(schema=mpii_schema, workers=8)
def mpii_transform(annotation):
return {
"image": np.array(Image.open(img_path + annotation["img_paths"])),
"isValidation": np.array(annotation["isValidation"]),
"img_paths": annotation["img_paths"],
"img_width": np.array(annotation["img_width"]),
"img_height": np.array(annotation["img_height"]),
"objpos": np.array(annotation["objpos"]),
"joint_self": np.array(annotation["joint_self"]),
"scale_provided": np.array(annotation["scale_provided"]),
"annolist_index": np.array(annotation["annolist_index"]),
"people_index": np.array(annotation["people_index"]),
"numOtherPeople": np.array(annotation["numOtherPeople"]),
}


if __name__ == "__main__":

tag = input("Enter tag(username/dataset_name):")
jsonfile = input("Enter json file path:")
img_path = input("Enter path to images:")

annotations = get_anno(jsonfile)

t1 = time.time()
ds = mpii_transform(annotations)
ds = ds.store(tag)
print("Time taken to upload:", (time.time() - t1), "sec")

"""
Dataset uploaded using AWS EC2. Pipeline took 8931.26 sec to
finish. Dataset is visible on app and tested working fine.
"""
2 changes: 2 additions & 0 deletions examples/tutorial/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# A Gentle Introduction to Hub
A collection of tutorials for [Hub](https://github.com/activeloopai/hub). It starts off by working with [different types](https://docs.activeloop.ai/en/latest/concepts/features.html#available-schemas) of data (eg images, audio), and then moves on to more complicated concepts like dynamic tensors.
273 changes: 273 additions & 0 deletions examples/tutorial/Tutorial 1a - Uploading Images.ipynb

Large diffs are not rendered by default.

Loading

0 comments on commit 32ae235

Please sign in to comment.