Skip to content

Commit 2e6a38e

Browse files
committed
Added DatasetVariationIndex
1 parent 3d7fedb commit 2e6a38e

File tree

4 files changed

+54
-47
lines changed

4 files changed

+54
-47
lines changed

order/adapters/das.py

+29-15
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
# coding: utf-8
22

33
from __future__ import annotations
4-
4+
import requests
5+
from order.settings import Settings
56

67
__all__ = ["DASDatasetAdapter"]
78

@@ -13,17 +14,30 @@ class DASDatasetAdapter(Adapter):
1314

1415
name = "das_dataset"
1516

16-
def retrieve_data(self, *, keys: list[str]) -> Materialized:
17-
if keys[0].startswith("/SCALE"):
18-
return Materialized(n_events=1, n_files=1)
19-
return Materialized(n_events=5_000_000, n_files=12)
20-
21-
22-
class DASLFNsAdapter(Adapter):
23-
24-
name = "das_lfns"
25-
26-
def retrieve_data(self, *, keys: list[str]) -> Materialized:
27-
if keys[0].startswith("/SCALE"):
28-
return Materialized(lfns=["/SCALE/b/NANOAODSIM"])
29-
return Materialized(lfns=["/a/b/NANOAODSIM"])
17+
def retrieve_data(self, *, keys: list[str], dbs_instance: str = "prod/global") -> Materialized:
18+
# Support list of keys since we may have datasets with extensions in stat
19+
results = {}
20+
for key in keys:
21+
resource = f"https://cmsweb.cern.ch:8443/dbs/{dbs_instance}/DBSReader/files?dataset={key}&detail=True" # noqa
22+
r = requests.get(
23+
resource,
24+
cert=Settings.instance().user_proxy,
25+
verify=False,
26+
)
27+
results[key] = r.json()
28+
29+
out = {"n_files": 0,
30+
"n_events": 0,
31+
"lfns": [],
32+
"file_size": 0}
33+
34+
for res in results.values():
35+
for file in res:
36+
out["n_files"] += 1
37+
out["n_events"] += file["event_count"]
38+
out["lfns"].append(file["logical_file_name"])
39+
out["file_size"] += file["file_size"]
40+
41+
return Materialized(**out)
42+
43+

order/adapters/dbs.py

-26
This file was deleted.

order/models/dataset.py

+22-5
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,16 @@
33
from __future__ import annotations
44

55

6-
__all__ = ["DatasetIndex", "Dataset", "LazyDataset", "DatasetVariation", "GenOrder"]
6+
__all__ = ["DatasetIndex", "Dataset", "LazyDataset", "DatasetVariation","DatasetVariationIndex", "GenOrder"]
77

88

99
import enum
1010

1111
from pydantic import Field, field_validator
1212

1313
from order.types import (
14-
Union, List, Dict, NonEmptyStrictStr, PositiveStrictInt, Lazy, ClassVar, Any,
14+
Union, List, Dict, NonEmptyStrictStr, PositiveStrictInt,
15+
PositiveStrictFloat, Lazy, ClassVar, Any,
1516
)
1617
# from order.util import validated
1718
from order.models.base import Model, AdapterModel
@@ -23,7 +24,13 @@ class DatasetIndex(UniqueObjectIndex):
2324
class_name: NonEmptyStrictStr = Field(default="Dataset", frozen=True)
2425
objects: Lazy[List[Union["LazyDataset", "Dataset"]]] = Field(default_factory=list, repr=False)
2526

27+
class DatasetVariationIndex(UniqueObjectIndex):
2628

29+
class_name: NonEmptyStrictStr = Field(default="DatasetVariation", frozen=True)
30+
# This may become Lazy if we read the dataset variations from somewhere else
31+
objects: Lazy[List[Union["LazyDataset", "DatasetVariation"]]] = Field(default_factory=list, repr=False)
32+
33+
2734
class LazyDataset(LazyUniqueObject):
2835

2936
class_name: NonEmptyStrictStr = Field(default="Dataset", frozen=True)
@@ -65,14 +72,17 @@ def __str__(self) -> str:
6572
return self.value
6673

6774

68-
class DatasetVariation(Model):
69-
75+
class DatasetVariation(UniqueObject):
76+
7077
keys: List[NonEmptyStrictStr] = Field(frozen=True)
7178
gen_order: NonEmptyStrictStr = Field(default=str(GenOrder.unknown))
7279
n_files: Lazy[PositiveStrictInt]
7380
n_events: Lazy[PositiveStrictInt]
81+
file_size: Lazy[PositiveStrictInt]
7482
lfns: Lazy[List[NonEmptyStrictStr]]
7583

84+
lazy_cls: ClassVar[UniqueObjectBase] = LazyDataset
85+
7686
@field_validator("gen_order", mode="after")
7787
@classmethod
7888
def validate_gen_order(cls, gen_order: str) -> str:
@@ -85,7 +95,7 @@ def validate_gen_order(cls, gen_order: str) -> str:
8595
class Dataset(UniqueObject):
8696

8797
campaign: Lazy["Campaign"]
88-
variations: Dict[str, DatasetVariation] = Field(frozen=True)
98+
variations: DatasetVariationIndex = Field(default_factory=DatasetVariationIndex, frozen=True)
8999

90100
lazy_cls: ClassVar[UniqueObjectBase] = LazyDataset
91101

@@ -127,10 +137,17 @@ def n_events(self) -> int:
127137
def lfns(self) -> list[NonEmptyStrictStr]:
128138
return self.variations["nominal"].lfns
129139

140+
@property
141+
def file_size(self) -> int:
142+
return self.variations["nominal"].file_size
143+
144+
130145

131146
# trailing imports
132147
from order.models.campaign import Campaign
133148

134149
# rebuild models that contained forward type declarations
135150
DatasetIndex.model_rebuild()
151+
DatasetVariation.model_rebuild()
152+
DatasetVariationIndex.model_rebuild()
136153
Dataset.model_rebuild()

order/types.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,10 @@
3535
from pydantic.fields import FieldInfo # noqa
3636

3737

38-
#: Strict positive integer.
38+
# Strict positive integer.
3939
PositiveStrictInt = Annotated[StrictInt, Ge(0)]
40+
# Strict positive float.
41+
PositiveStrictFloat = Annotated[StrictFloat, Ge(0)]
4042

4143
#: Strict non-empty string.
4244
NonEmptyStrictStr = Annotated[StrictStr, Len(min_length=1)]

0 commit comments

Comments
 (0)