Skip to content

Commit 44b7934

Browse files
authored
Version 6.1.0 (#175)
1 parent 149148e commit 44b7934

File tree

80 files changed

+1653
-925
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

80 files changed

+1653
-925
lines changed

Pipfile.lock

+472-433
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

VERSION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
6.0.0
1+
6.1.0

exabel_data_sdk/client/api/bulk_insert.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
)
1313
from exabel_data_sdk.services.csv_loading_constants import (
1414
DEFAULT_ABORT_THRESHOLD,
15+
DEFAULT_MAX_BACKOFF_SECONDS,
16+
DEFAULT_MIN_BACKOFF_SECONDS,
1517
DEFAULT_NUMBER_OF_RETRIES,
1618
DEFAULT_NUMBER_OF_THREADS,
1719
)
@@ -105,7 +107,11 @@ def _bulk_insert(
105107
raise BulkInsertFailedError()
106108

107109

108-
def _get_backoff(trial: int, min_sleep: float = 1.0, max_sleep: float = 60.0) -> float:
110+
def _get_backoff(
111+
trial: int,
112+
min_sleep: float = DEFAULT_MIN_BACKOFF_SECONDS,
113+
max_sleep: float = DEFAULT_MAX_BACKOFF_SECONDS,
114+
) -> float:
109115
"""Return the backoff in seconds for the given trial."""
110116
return min(min_sleep * 2**trial, max_sleep)
111117

exabel_data_sdk/client/api/time_series_api.py

+30
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ def create_time_series(
170170
series: Union[pd.Series, TimeSeries],
171171
create_tag: Optional[bool] = None, # pylint: disable=unused-argument
172172
default_known_time: Optional[DefaultKnownTime] = None,
173+
should_optimise: Optional[bool] = None,
173174
) -> None:
174175
"""
175176
Create a time series.
@@ -193,13 +194,17 @@ def create_time_series(
193194
the Known Time for data points where a specific known time timestamp
194195
has not been given. If not provided, the Exabel API defaults to the
195196
current time (upload time) as the Known Time.
197+
should_optimise:
198+
Whether time series storage optimisation should be enabled or not. If not
199+
set, optimisation is at the discretion of the server.
196200
"""
197201
series = self._handle_time_series(name, series)
198202

199203
self.client.create_time_series(
200204
CreateTimeSeriesRequest(
201205
time_series=series.to_proto(),
202206
default_known_time=default_known_time,
207+
insert_options=InsertOptions(should_optimise=should_optimise),
203208
),
204209
)
205210

@@ -210,6 +215,7 @@ def upsert_time_series(
210215
series: pd.Series,
211216
create_tag: Optional[bool] = None, # pylint: disable=unused-argument
212217
default_known_time: Optional[DefaultKnownTime] = None,
218+
should_optimise: Optional[bool] = None,
213219
) -> None:
214220
"""
215221
Create or update a time series.
@@ -229,12 +235,16 @@ def upsert_time_series(
229235
the Known Time for data points where a specific known time timestamp
230236
has not been given. If not provided, the Exabel API defaults to the
231237
current time (upload time) as the Known Time.
238+
should_optimise:
239+
Whether time series storage optimisation should be enabled or not. If not
240+
set, optimisation is at the discretion of the server.
232241
"""
233242
self.append_time_series_data(
234243
name,
235244
series,
236245
default_known_time,
237246
allow_missing=True,
247+
should_optimise=should_optimise,
238248
)
239249

240250
@deprecate_arguments(create_tag=None)
@@ -245,6 +255,7 @@ def append_time_series_data(
245255
default_known_time: Optional[DefaultKnownTime] = None,
246256
allow_missing: bool = False,
247257
create_tag: bool = False, # pylint: disable=unused-argument
258+
should_optimise: Optional[bool] = None,
248259
) -> None:
249260
"""
250261
Append data to the given time series.
@@ -263,6 +274,9 @@ def append_time_series_data(
263274
allow_missing: If set to true, and the resource is not found, a new resource will be
264275
created. In this situation, the "update_mask" is ignored.
265276
create_tag: Deprecated.
277+
should_optimise:
278+
Whether time series storage optimisation should be enabled or not. If not
279+
set, optimisation is at the discretion of the server.
266280
"""
267281
series = self._handle_time_series(name, series)
268282

@@ -271,6 +285,7 @@ def append_time_series_data(
271285
time_series=series.to_proto(),
272286
insert_options=InsertOptions(
273287
default_known_time=default_known_time,
288+
should_optimise=should_optimise,
274289
),
275290
update_options=UpdateOptions(
276291
allow_missing=allow_missing,
@@ -289,6 +304,7 @@ def import_time_series(
289304
status_in_response: bool = False,
290305
replace_existing_time_series: bool = False,
291306
replace_existing_data_points: bool = False,
307+
should_optimise: Optional[bool] = None,
292308
) -> Optional[Sequence[ResourceCreationResult]]:
293309
"""
294310
Import multiple time series.
@@ -325,6 +341,9 @@ def import_time_series(
325341
inserted time series points. Data points at times not present in the
326342
request will be left untouched. Only one of replace_existing_data_points
327343
or replace_existing_time_series can be set to true.
344+
should_optimise:
345+
Whether time series storage optimisation should be enabled or not. If
346+
not set, optimisation is at the discretion of the server.
328347
Returns:
329348
If status_in_response is set to true, a list of ResourceCreationResult will be returned.
330349
Otherwise, None is returned.
@@ -348,6 +367,7 @@ def import_time_series(
348367
status_in_response=status_in_response,
349368
insert_options=InsertOptions(
350369
default_known_time=default_known_time,
370+
should_optimise=should_optimise,
351371
),
352372
update_options=update_options,
353373
)
@@ -367,6 +387,7 @@ def append_time_series_data_and_return(
367387
allow_missing: Optional[bool] = False,
368388
create_tag: Optional[bool] = None, # pylint: disable=unused-argument
369389
include_metadata: Optional[bool] = False,
390+
should_optimise: Optional[bool] = None,
370391
) -> Union[pd.Series, TimeSeries]:
371392
"""
372393
Append data to the given time series, and return the full series.
@@ -388,6 +409,9 @@ def append_time_series_data_and_return(
388409
include_metadata:
389410
Whether to include the metadata of the time series in the response.
390411
Returns a TimeSeries object if set to True, otherwise a pandas Series.
412+
should_optimise:
413+
Whether time series storage optimisation should be enabled or not. If not
414+
set, optimisation is at the discretion of the server.
391415
392416
Returns:
393417
A series with all data for the given time series.
@@ -401,6 +425,7 @@ def append_time_series_data_and_return(
401425
view=TimeSeriesView(time_range=TimeRange()),
402426
insert_options=InsertOptions(
403427
default_known_time=default_known_time,
428+
should_optimise=should_optimise,
404429
),
405430
update_options=UpdateOptions(
406431
allow_missing=allow_missing,
@@ -441,6 +466,7 @@ def bulk_upsert_time_series(
441466
default_known_time: Optional[DefaultKnownTime] = None,
442467
replace_existing_time_series: bool = False,
443468
replace_existing_data_points: bool = False,
469+
should_optimise: Optional[bool] = None,
444470
retries: int = DEFAULT_NUMBER_OF_RETRIES,
445471
abort_threshold: Optional[float] = DEFAULT_ABORT_THRESHOLD,
446472
# Deprecated arguments
@@ -477,6 +503,9 @@ def bulk_upsert_time_series(
477503
inserted time series points. Data points at times not present in the
478504
request will be left untouched. Only one of replace_existing_data_points
479505
or replace_existing_time_series can be set to true.
506+
should_optimise:
507+
Whether time series storage optimisation should be enabled or not. If
508+
not set, optimisation is at the discretion of the server.
480509
retries: Maximum number of retries to make for each failed request.
481510
abort_threshold:
482511
The threshold for the proportion of failed requests that will cause the
@@ -497,6 +526,7 @@ def import_func(
497526
status_in_response=True,
498527
replace_existing_time_series=replace_existing_time_series,
499528
replace_existing_data_points=replace_existing_data_points,
529+
should_optimise=should_optimise,
500530
)
501531
assert result is not None
502532
return result
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
import argparse
2+
import sys
3+
from typing import Sequence
4+
5+
import pandas as pd
6+
7+
from exabel_data_sdk import ExabelClient
8+
from exabel_data_sdk.scripts.list_time_series import ListTimeSeries
9+
from exabel_data_sdk.services.csv_loading_constants import (
10+
DEFAULT_NUMBER_OF_RETRIES,
11+
DEFAULT_NUMBER_OF_THREADS,
12+
)
13+
14+
15+
class DeleteTimeSeriesPoints(ListTimeSeries):
16+
"""
17+
Deletes all time series data points for a specifc date and known time.
18+
The script fetches all time series for a given signal, entity type or
19+
entity, or a combination of these.
20+
"""
21+
22+
def __init__(self, argv: Sequence[str], description: str):
23+
super().__init__(argv, description)
24+
self.parser.add_argument(
25+
"--date",
26+
required=True,
27+
type=str,
28+
help="The date of the time series point to delete.",
29+
)
30+
self.parser.add_argument(
31+
"--known-time",
32+
required=False,
33+
type=str,
34+
help="The known time of the time series point to delete.",
35+
)
36+
self.parser.add_argument(
37+
"--dry-run",
38+
required=False,
39+
action="store_true",
40+
default=False,
41+
help="Only print to console instead of deleting",
42+
)
43+
self.parser.add_argument(
44+
"--threads",
45+
required=False,
46+
type=int,
47+
choices=range(1, 101),
48+
metavar="[1-100]",
49+
default=DEFAULT_NUMBER_OF_THREADS,
50+
help=f"The number of parallel upload threads to run. "
51+
f"Defaults to {DEFAULT_NUMBER_OF_THREADS}.",
52+
)
53+
self.parser.add_argument(
54+
"--retries",
55+
required=False,
56+
type=int,
57+
choices=range(1, 51),
58+
metavar="[1-50]",
59+
default=DEFAULT_NUMBER_OF_RETRIES,
60+
help=f"The maximum number of retries to make for each failed request. Defaults to "
61+
f"{DEFAULT_NUMBER_OF_RETRIES}.",
62+
)
63+
64+
def run_script(self, client: ExabelClient, args: argparse.Namespace) -> None:
65+
all_time_series = self._list_time_series(
66+
client,
67+
entity=args.entity,
68+
signal=args.signal,
69+
entity_type=args.entity_type,
70+
show_progress=args.show_progress,
71+
)
72+
73+
if not all_time_series:
74+
print("Did not find any time series.")
75+
return
76+
77+
num_time_series = len(all_time_series)
78+
print(f"Number of time series data points to delete: {num_time_series}")
79+
80+
date = pd.Timestamp(args.date)
81+
known_time = pd.Timestamp(args.known_time) if args.known_time else None
82+
83+
index = (
84+
pd.MultiIndex.from_tuples([(date, known_time)], names=["date", "known_time"])
85+
if known_time
86+
else pd.Index([date], name="date")
87+
)
88+
89+
series = [pd.Series([1], index=index, name=time_series) for time_series in all_time_series]
90+
91+
print(f"Deleting time series data points with date {args.date}", end=" ")
92+
if known_time:
93+
print(f"and known time {args.known_time}", end=" ")
94+
print(f"from the following {num_time_series} time series.")
95+
96+
for ts in series:
97+
print(ts.name)
98+
99+
if args.dry_run:
100+
print(f"Would have deleted {num_time_series} time series data points.")
101+
return
102+
103+
result = client.time_series_api.batch_delete_time_series_points(
104+
series, args.threads, args.retries
105+
)
106+
107+
print(f"Successfully deleted {result.total_count} time series data points.")
108+
if result.has_failure():
109+
print(f"Failed to delete {len(result.get_failures())} time series data points.")
110+
111+
112+
if __name__ == "__main__":
113+
DeleteTimeSeriesPoints(
114+
sys.argv,
115+
"Deletes all time series data points for a given date and known time. "
116+
"A signal, entity type, or entity, or a combination of these, can be specified "
117+
"to filter the time series to delete.",
118+
).run()

exabel_data_sdk/scripts/load_time_series_from_file.py

+18
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,21 @@ def __init__(self, argv: Sequence[str]):
137137
default=False,
138138
help="Replace any existing data points on the specified dates when importing",
139139
)
140+
group = self.parser.add_mutually_exclusive_group()
141+
group.add_argument(
142+
"--optimise",
143+
required=False,
144+
action="store_true",
145+
help="Enable time series storage optimisation. If neither this nor --no-optimise is "
146+
"set, optimisation is at the discretion of the server.",
147+
)
148+
group.add_argument(
149+
"--no-optimise",
150+
required=False,
151+
action="store_true",
152+
help="Disable time series storage optimisation. If neither this nor -optimise is set, "
153+
"optimisation is at the discretion of the server.",
154+
)
140155

141156
def run_script(self, client: ExabelClient, args: argparse.Namespace) -> None:
142157
try:
@@ -159,6 +174,9 @@ def run_script(self, client: ExabelClient, args: argparse.Namespace) -> None:
159174
abort_threshold=args.abort_threshold,
160175
replace_existing_time_series=args.replace_existing_time_series,
161176
replace_existing_data_points=args.replace_existing_data_points,
177+
should_optimise=(
178+
True if args.optimise is True else False if args.no_optimise is True else None
179+
),
162180
)
163181
except FileLoadingException as e:
164182
print("ERROR: Loading time series failed.")

exabel_data_sdk/services/csv_loading_constants.py

+2
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,5 @@
55
DEFAULT_NUMBER_OF_RETRIES = 5
66
MAX_THREADS_FOR_IMPORT = 100
77
FAILURE_LOG_LIMIT = None # type: ignore[var-annotated]
8+
DEFAULT_MIN_BACKOFF_SECONDS = 1
9+
DEFAULT_MAX_BACKOFF_SECONDS = 60 * 10

exabel_data_sdk/services/file_time_series_loader.py

+7
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ def load_time_series(
7878
case_sensitive_signals: bool = False,
7979
replace_existing_time_series: bool = False,
8080
replace_existing_data_points: bool = False,
81+
should_optimise: Optional[bool] = None,
8182
return_results: bool = True,
8283
processed_rows: int = 0,
8384
total_rows: Optional[int] = None,
@@ -122,6 +123,8 @@ def load_time_series(
122123
case_sensitive_signals: if True, signals are case sensitive
123124
replace_existing_time_series: if True, any existing time series are replaced
124125
replace_existing_data_points: if True, any existing time series data points are replaced
126+
should_optimise: Whether time series storage optimisation should be enabled or not. If
127+
not set, optimisation is at the discretion of the server.
125128
return_results: if True, returns a list of TimeSeriesFileLoadingResults
126129
or otherwise an empty list.
127130
processed_rows: the number of rows already processed
@@ -169,6 +172,7 @@ def load_time_series(
169172
replace_existing_time_series=replace_existing_time_series,
170173
replace_existing_data_points=replace_existing_data_points,
171174
replaced_time_series=replaced_time_series,
175+
should_optimise=should_optimise,
172176
)
173177
if result.processed_rows is not None and total_rows:
174178
processed_rows = processed_rows + result.processed_rows
@@ -206,6 +210,7 @@ def _load_time_series(
206210
replace_existing_time_series: bool = False,
207211
replace_existing_data_points: bool = False,
208212
replaced_time_series: Optional[Sequence[str]] = None,
213+
should_optimise: Optional[bool] = None,
209214
) -> TimeSeriesFileLoadingResult:
210215
"""
211216
Load time series from a parser.
@@ -371,6 +376,7 @@ def _load_time_series(
371376
retries=retries,
372377
abort_threshold=abort_threshold,
373378
replace_existing_time_series=True,
379+
should_optimise=should_optimise,
374380
)
375381
if error_on_any_failure and (replace_result.has_failure() or invalid_series):
376382
raise FileLoadingException(
@@ -387,6 +393,7 @@ def _load_time_series(
387393
retries=retries,
388394
abort_threshold=abort_threshold,
389395
replace_existing_data_points=replace_existing_data_points,
396+
should_optimise=should_optimise,
390397
)
391398
if error_on_any_failure and (result.has_failure() or invalid_series):
392399
raise FileLoadingException(

0 commit comments

Comments
 (0)