Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Store.groupby enhancements #670

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Prev Previous commit
Next Next commit
MemoryStore: inherit parent groupby
  • Loading branch information
rkingsbury committed May 27, 2022
commit 51043a1648ff9f45210e8d2f84e2ec86a8e9f172
47 changes: 0 additions & 47 deletions src/maggma/stores/mongolike.py
Original file line number Diff line number Diff line change
@@ -612,53 +612,6 @@ def __hash__(self):
"""Hash for the store"""
return hash((self.name, self.last_updated_field))

def groupby(
self,
keys: Union[List[str], str],
criteria: Optional[Dict] = None,
properties: Union[Dict, List, None] = None,
sort: Optional[Dict[str, Union[Sort, int]]] = None,
skip: int = 0,
limit: int = 0,
) -> Iterator[Tuple[Dict, List[Dict]]]:
"""
Simple grouping function that will group documents
by keys.

Args:
keys: fields to group documents
criteria: PyMongo filter for documents to search in
properties: properties to return in grouped documents
sort: Dictionary of sort order for fields. Keys are field names and
values are 1 for ascending or -1 for descending.
skip: number documents to skip
limit: limit on total number of documents returned

Returns:
generator returning tuples of (key, list of elemnts)
"""
keys = keys if isinstance(keys, list) else [keys]

if properties is None:
properties = []
if isinstance(properties, dict):
properties = list(properties.keys())

data = [
doc
for doc in self.query(properties=keys + properties, criteria=criteria)
if all(has(doc, k) for k in keys)
]

def grouping_keys(doc):
return tuple(get(doc, k) for k in keys)

for vals, group in groupby(sorted(data, key=grouping_keys), key=grouping_keys):
doc = {} # type: Dict[Any,Any]
for k, v in zip(keys, vals):
set_(doc, k, v)
yield doc, list(group)

def __eq__(self, other: object) -> bool:
"""
Check equality for MemoryStore