Skip to content

Commit

Permalink
Merge pull request specklesystems#17 from specklesystems/izzy/chunking
Browse files Browse the repository at this point in the history
🍰 Chunking of long lists
  • Loading branch information
izzylys authored Dec 24, 2020
2 parents 78068d0 + 028ca64 commit 3e7b620
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 20 deletions.
16 changes: 14 additions & 2 deletions speckle/api/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def receive(
# try local transport first. if the parent is there, we assume all the children are there and continue wth deserialisation using the local transport
obj_string = local_transport.get_object(obj_id)
if obj_string:
base = serializer.read_json(id=obj_id, obj_string=obj_string)
base = serializer.read_json(obj_string=obj_string)
return base

if not remote_transport:
Expand All @@ -78,4 +78,16 @@ def receive(
id=obj_id, target_transport=local_transport
)

return serializer.read_json(id=obj_id, obj_string=obj_string)
return serializer.read_json(obj_string=obj_string)


def serialize(base: Base) -> str:
serializer = BaseObjectSerializer()

return serializer.write_json(base)[1]


def deserialize(obj_string: str) -> Base:
serializer = BaseObjectSerializer()

return serializer.read_json(obj_string=obj_string)
14 changes: 14 additions & 0 deletions speckle/objects/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from pathlib import Path
import sys
import inspect
import pkgutil
from importlib import import_module
from .base import Base


for (_, name, _) in pkgutil.iter_modules([Path(__file__).parent]):
imported_module = import_module("." + name, package=__name__)
classes = inspect.getmembers(imported_module, inspect.isclass)
for c in classes:
if issubclass(c[1], Base):
setattr(sys.modules[__name__], c[0], c[1])
33 changes: 28 additions & 5 deletions speckle/objects/base.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from __future__ import annotations
from speckle.logging.exceptions import SpeckleException

from typing import Dict, List, Optional, Any

from pydantic import BaseModel
from pydantic.main import Extra
from typing import Dict, List, Optional, Any
from speckle.transports.memory import MemoryTransport
from speckle.logging.exceptions import SpeckleException


PRIMITIVES = (int, float, str, bool)

Expand All @@ -14,13 +15,18 @@ class Base(BaseModel):
totalChildrenCount: Optional[int] = None
applicationId: Optional[str] = None
speckle_type: Optional[str] = "Base"
_chunkable: Dict[str, int] = {} # dict of chunkable props and their max chunk size

def __init__(self, **kwargs) -> None:
super().__init__()
self.speckle_type = self.__class__.__name__
self.__dict__.update(kwargs)

def __repr__(self) -> str:
return f"{self.__class__.__name__}(id: {self.id}, speckle_type: {self.speckle_type}, totalChildrenCount: {self.totalChildrenCount})"

def __str__(self) -> str:
return f"Base(id: {self.id}, speckle_type: {self.speckle_type}, totalChildrenCount: {self.totalChildrenCount})"
return self.__repr__()

def __setitem__(self, name: str, value: Any) -> None:
self.__dict__[name] = value
Expand Down Expand Up @@ -74,6 +80,19 @@ def get_children_count(self) -> int:
parsed = []
return 1 + self._count_descendants(self, parsed)

def get_id(self, decompose: bool = False) -> str:
if self.id and not decompose:
return self.id
else:
from speckle.serialization.base_object_serializer import (
BaseObjectSerializer,
)

serializer = BaseObjectSerializer()
if decompose:
serializer.write_transports = [MemoryTransport()]
return serializer.traverse_base(self)[0]

def _count_descendants(self, base: Base, parsed: List) -> int:
if base in parsed:
return 0
Expand Down Expand Up @@ -114,4 +133,8 @@ def _handle_object_count(self, obj: Any, parsed: List) -> int:
return count

class Config:
extra = Extra.allow
extra = Extra.allow


class DataChunk(Base):
data: List[Any] = []
24 changes: 24 additions & 0 deletions speckle/objects/mesh.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from __future__ import annotations

from typing import List, Optional

from pydantic import BaseModel
from .base import Base

CHUNKABLE_PROPS = {
"vertices": 2000,
"faces": 2000,
"colors": 2000,
"textureCoordinates": 2000,
}


class Mesh(Base):
vertices: List[float] = None
faces: List[int] = None
colors: List[int] = None
textureCoordinates: List[float] = None

def __init__(self, **kwargs) -> None:
super().__init__(**kwargs)
self._chunkable.update(CHUNKABLE_PROPS)
71 changes: 59 additions & 12 deletions speckle/serialization/base_object_serializer.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import json
import hashlib

from speckle import objects
from uuid import uuid4
from typing import Any, Dict, List, Tuple
from speckle.objects.base import Base
from speckle.objects.base import Base, DataChunk
from speckle.logging.exceptions import SerializationException, SpeckleException
from speckle.transports.abstract_transport import AbstractTransport

Expand Down Expand Up @@ -54,11 +55,16 @@ def traverse_base(self, base: Base) -> Tuple[str, Dict]:
prop = props.pop(0)
value = obj[prop]

# skip nulls or props marked to be ignored with "__"
if not value or prop.startswith("__"):
# skip nulls or props marked to be ignored with "__" or "_"
if not value or prop.startswith(("__", "_")):
continue

detach = True if prop.startswith("@") else False
# don't prepopulate id as this will mess up hashing
if prop == "id":
continue

chunkable = True if prop in base._chunkable else False
detach = True if prop.startswith("@") or chunkable else False

# 1. handle primitives (ints, floats, strings, and bools)
if isinstance(value, PRIMITIVES):
Expand All @@ -68,13 +74,33 @@ def traverse_base(self, base: Base) -> Tuple[str, Dict]:
# 2. handle Base objects
elif isinstance(value, Base):
child_obj = self.traverse_value(value, detach=detach)
if detach:
if detach and self.write_transports:
ref_hash = child_obj["id"]
object_builder[prop] = self.detach_helper(ref_hash=ref_hash)
else:
object_builder[prop] = child_obj

# 3. handle all other cases
# 3. handle chunkable props
elif chunkable and self.write_transports:
chunks = []
max_size = base._chunkable[prop]
chunk = DataChunk()
for count, item in enumerate(value):
if count and count % max_size == 0:
chunks.append(chunk)
chunk = DataChunk()
chunk.data.append(item)
chunks.append(chunk)

chunk_refs = []
for c in chunks:
self.detach_lineage.append(detach)
ref_hash, _ = self.traverse_base(c)
ref_obj = self.detach_helper(ref_hash=ref_hash)
chunk_refs.append(ref_obj)
object_builder[prop] = chunk_refs

# 4. handle all other cases
else:
child_obj = self.traverse_value(value)
object_builder[prop] = child_obj
Expand All @@ -92,7 +118,7 @@ def traverse_base(self, base: Base) -> Tuple[str, Dict]:
}

# write detached or root objects to transports
if detached:
if detached and self.write_transports:
for t in self.write_transports:
t.save_object(id=hash, serialized_object=json.dumps(object_builder))

Expand Down Expand Up @@ -168,11 +194,10 @@ def __reset_writer(self) -> None:
self.family_tree = {}
self.closure_table = {}

def read_json(self, id: str, obj_string: str) -> Base:
def read_json(self, obj_string: str) -> Base:
"""Recomposes a Base object from the string representation of the object
Arguments:
id {str} -- the hash of the object
obj_string {str} -- the string representation of the object
Returns:
Expand All @@ -199,9 +224,11 @@ def recompose_base(self, obj: dict) -> Base:
return
if isinstance(obj, str):
obj = json.loads(obj)
if obj["speckle_type"] == "reference":
obj = self.get_child(obj=obj)

# initialise the base object
base = Base()
# initialise the base object using `speckle_type`
base = getattr(objects, obj["speckle_type"], Base)()

# get total children count
if "__closure" in obj:
Expand Down Expand Up @@ -247,16 +274,36 @@ def handle_value(self, obj: Any):
if isinstance(obj, PRIMITIVES):
return obj

# lists (regular and chunked)
if isinstance(obj, list):
return [self.handle_value(o) for o in obj]
obj_list = [self.handle_value(o) for o in obj]
# handle chunked lists
if isinstance(obj_list[0], DataChunk):
data = []
for o in obj_list:
data.extend(o["data"])
return data
else:
return obj_list

# bases
if isinstance(obj, dict) and "speckle_type" in obj:
return self.recompose_base(obj=obj)

# dictionaries
if isinstance(obj, dict):
for k, v in obj.items():
if isinstance(v, PRIMITIVES):
continue
else:
obj[k] = self.handle_value(v)
return obj

def get_child(self, obj: Dict):
ref_hash = obj["referencedId"]
ref_obj_str = self.read_transport.get_object(id=ref_hash)
if not ref_obj_str:
raise SpeckleException(
f"Could not find the referenced child object of id `{ref_hash}` in the given read transport: {self.read_transport.name}"
)
return json.loads(ref_obj_str)
2 changes: 1 addition & 1 deletion speckle/transports/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def save_object_from_transport(

def get_object(self, id: str) -> str or None:
if id in self.objects:
return json.dumps(self.objects[id])
return self.objects[id]
else:
return None

Expand Down

0 comments on commit 3e7b620

Please sign in to comment.