-
Notifications
You must be signed in to change notification settings - Fork 648
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into LabelsUpdate
- Loading branch information
Showing
43 changed files
with
3,342 additions
and
497 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -185,4 +185,8 @@ logs/ | |
.creds/ | ||
.idea/ | ||
waymo/ | ||
output/ | ||
output/ | ||
cov.xml | ||
hub/api/cov.xml | ||
hub/api/nested_seq | ||
nested_seq |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
import torchvision | ||
import torch | ||
import numpy as np | ||
|
||
import hub | ||
from hub.utils import Timer | ||
|
||
|
||
class HubAdapter2(torch.utils.data.Dataset): | ||
def __init__(self, ods): | ||
self.ds = ods | ||
|
||
def __len__(self): | ||
return min(len(self.ds), 1000 * 1000) | ||
|
||
@property | ||
def shape(self): | ||
return (self.ds.__len__(), None, None, None) | ||
|
||
def __iter__(self): | ||
for i in range(len(self)): | ||
yield self[i] | ||
|
||
def __getitem__(self, index): | ||
x, y = self.ds.__getitem__(index) | ||
res = {"image": np.array(x), "label": y} | ||
return res | ||
|
||
|
||
def test(): | ||
tv_cifar_ds = torchvision.datasets.CIFAR10(".", download=True) | ||
|
||
hub_cifar = HubAdapter2(tv_cifar_ds) | ||
|
||
pt2hb_ds = hub.Dataset.from_pytorch(hub_cifar, scheduler="threaded", workers=8) | ||
res_ds = pt2hb_ds.store("./data/test/cifar/train") | ||
hub_s3_ds = hub.Dataset( | ||
url="./data/test/cifar/train", cache=False, storage_cache=False | ||
) | ||
print(hub_s3_ds._tensors["/image"].chunks) | ||
hub_s3_ds = hub_s3_ds.to_pytorch() | ||
dl = torch.utils.data.DataLoader(hub_s3_ds, batch_size=100, num_workers=8) | ||
with Timer("Time"): | ||
counter = 0 | ||
for i, b in enumerate(dl): | ||
with Timer("Batch Time"): | ||
x, y = b["image"], b["image"] | ||
counter += 100 | ||
print(counter) | ||
|
||
|
||
if __name__ == "__main__": | ||
test() |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
sphinx==3.1.2 | ||
sphinx_markdown_tables==0.0.15 | ||
recommonmark==0.6.0 | ||
recommonmark==0.7.1 | ||
sphinx_rtd_theme==0.5.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
import json | ||
import time | ||
import numpy as np | ||
from PIL import Image | ||
from tqdm import tqdm | ||
|
||
import hub | ||
from hub import Dataset, schema | ||
from hub.schema import Tensor, Text | ||
|
||
""" | ||
Below we will define a schema for our dataset. Schema is kind of | ||
a container to specify structure, shape, dtype and meta information | ||
of our dataset. We have different types of schemas for different | ||
types of data like image, tensor, text. More info. on docs. | ||
""" | ||
mpii_schema = { | ||
""" | ||
we specify 'shape' as None for variable image size, and we | ||
give 'max_shape' arguement a maximum possible size of image. | ||
""" | ||
"image": schema.Image( | ||
shape=(None, None, 3), max_shape=(1920, 1920, 3), dtype="uint8" | ||
), | ||
"isValidation": "float64", | ||
"img_paths": Text(shape=(None,), max_shape=(15,)), | ||
"img_width": "int32", | ||
"img_height": "int32", | ||
"objpos": Tensor(max_shape=(100,), dtype="float64"), | ||
""" | ||
'joint_self' has nested list structure | ||
""" | ||
"joint_self": Tensor(shape=(None, None), max_shape=(100, 100), dtype="float64"), | ||
"scale_provided": "float64", | ||
"annolist_index": "int32", | ||
"people_index": "int32", | ||
"numOtherPeople": "int32", | ||
} | ||
|
||
|
||
""" | ||
Below function takes JSON file and gives annotations in the | ||
form of dictionary inside list. | ||
""" | ||
|
||
|
||
def get_anno(jsonfile): | ||
|
||
with open(jsonfile) as f: | ||
instances = json.load(f) | ||
|
||
annotations = [] | ||
for i in range(len(instances)): | ||
annotations.append(instances[i]) | ||
return annotations | ||
|
||
|
||
""" | ||
Hub Transform is optimized to give efficient processing and | ||
storing of dataset. Below function takes a dataset and applies | ||
transform on every sample(instance) of dataset, and outputs a | ||
dataset with specified schema. More info. on docs. | ||
""" | ||
|
||
|
||
@hub.transform(schema=mpii_schema, workers=8) | ||
def mpii_transform(annotation): | ||
return { | ||
"image": np.array(Image.open(img_path + annotation["img_paths"])), | ||
"isValidation": np.array(annotation["isValidation"]), | ||
"img_paths": annotation["img_paths"], | ||
"img_width": np.array(annotation["img_width"]), | ||
"img_height": np.array(annotation["img_height"]), | ||
"objpos": np.array(annotation["objpos"]), | ||
"joint_self": np.array(annotation["joint_self"]), | ||
"scale_provided": np.array(annotation["scale_provided"]), | ||
"annolist_index": np.array(annotation["annolist_index"]), | ||
"people_index": np.array(annotation["people_index"]), | ||
"numOtherPeople": np.array(annotation["numOtherPeople"]), | ||
} | ||
|
||
|
||
if __name__ == "__main__": | ||
|
||
tag = input("Enter tag(username/dataset_name):") | ||
jsonfile = input("Enter json file path:") | ||
img_path = input("Enter path to images:") | ||
|
||
annotations = get_anno(jsonfile) | ||
|
||
t1 = time.time() | ||
ds = mpii_transform(annotations) | ||
ds = ds.store(tag) | ||
print("Time taken to upload:", (time.time() - t1), "sec") | ||
|
||
""" | ||
Dataset uploaded using AWS EC2. Pipeline took 8931.26 sec to | ||
finish. Dataset is visible on app and tested working fine. | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# A Gentle Introduction to Hub | ||
A collection of tutorials for [Hub](https://github.com/activeloopai/hub). It starts off by working with [different types](https://docs.activeloop.ai/en/latest/concepts/features.html#available-schemas) of data (eg images, audio), and then moves on to more complicated concepts like dynamic tensors. |
Large diffs are not rendered by default.
Oops, something went wrong.
Oops, something went wrong.