Skip to content

Commit

Permalink
v4.3.0 (#146)
Browse files Browse the repository at this point in the history
  • Loading branch information
blundski authored May 9, 2023
1 parent e975f1f commit 80ca08c
Show file tree
Hide file tree
Showing 7 changed files with 198 additions and 11 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
4.2.0
4.3.0
11 changes: 11 additions & 0 deletions exabel_data_sdk/scripts/load_time_series_from_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,16 @@ def __init__(self, argv: Sequence[str]):
default=False,
help="If set, the time series are not validated before uploading.",
)
self.parser.add_argument(
"--case-sensitive-signals",
required=False,
action="store_true",
default=False,
help="If set, signal names are treated case sensitive. Note that this will disable "
"lowercasing of other column headers as well, as entities, 'date', and "
"'known_time'. Take care to maintain correct casing in the file when using this "
"option.",
)

def run_script(self, client: ExabelClient, args: argparse.Namespace) -> None:
try:
Expand All @@ -138,6 +148,7 @@ def run_script(self, client: ExabelClient, args: argparse.Namespace) -> None:
retries=args.retries,
batch_size=args.batch_size,
skip_validation=args.skip_validation,
case_sensitive_signals=args.case_sensitive_signals,
)
except FileLoadingException as e:
print(e)
Expand Down
3 changes: 3 additions & 0 deletions exabel_data_sdk/services/csv_time_series_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def load_time_series(
retries: int = DEFAULT_NUMBER_OF_RETRIES,
abort_threshold: Optional[float] = 0.5,
skip_validation: bool = False,
case_sensitive_signals: bool = False,
# Deprecated arguments
namespace: Optional[str] = None, # pylint: disable=unused-argument
) -> FileLoadingResult:
Expand Down Expand Up @@ -69,6 +70,7 @@ def load_time_series(
abort_threshold: the threshold for the proportion of failed requests that will cause the
upload to be aborted; if it is `None`, the upload is never aborted
skip_validation: if True, the time series are not validated before uploading
case_sensitive_signals: if True, signals are case sensitive
"""
results = FileTimeSeriesLoader(self._client).load_time_series(
filename=filename,
Expand All @@ -86,6 +88,7 @@ def load_time_series(
retries=retries,
abort_threshold=abort_threshold,
skip_validation=skip_validation,
case_sensitive_signals=case_sensitive_signals,
)
if len(results) != 1:
raise ValueError("Unexpected number of results from time series loading.")
Expand Down
23 changes: 16 additions & 7 deletions exabel_data_sdk/services/file_time_series_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def load_time_series(
abort_threshold: Optional[float] = 0.5,
batch_size: Optional[int] = None,
skip_validation: bool = False,
case_sensitive_signals: bool = False,
return_results: bool = True,
# Deprecated arguments
namespace: Optional[str] = None, # pylint: disable=unused-argument
Expand Down Expand Up @@ -104,6 +105,7 @@ def load_time_series(
batch_size: the number of rows to read and upload in each batch; if not specified, the
entire file will be read into memory and uploaded in a single batch
skip_validation: if True, the time series are not validated before uploading
case_sensitive_signals: if True, signals are case sensitive
"""
if batch_size is not None:
logger.info(
Expand Down Expand Up @@ -139,6 +141,7 @@ def load_time_series(
retries=retries,
abort_threshold=abort_threshold,
skip_validation=skip_validation,
case_sensitive_signals=case_sensitive_signals,
)
if return_results:
results.append(result)
Expand All @@ -163,6 +166,7 @@ def _load_time_series(
retries: int = DEFAULT_NUMBER_OF_RETRIES,
abort_threshold: Optional[float] = 0.5,
skip_validation: bool = False,
case_sensitive_signals: bool = False,
) -> TimeSeriesFileLoadingResult:
"""
Load time series from a parser.
Expand Down Expand Up @@ -204,6 +208,7 @@ def _load_time_series(
self._client.namespace,
entity_mapping,
entity_type=identifier_type or entity_type,
case_sensitive_signals=case_sensitive_signals,
)
break
if parsed_file is None:
Expand Down Expand Up @@ -264,22 +269,26 @@ def _load_time_series(
# Signals are reversed because we want to match the first signal returned by the API
# lexicographically.
all_signals = {
signal.name.lower(): signal
signal.name if case_sensitive_signals else signal.name.lower(): signal
for signal in reversed(list(self._client.signal_api.get_signal_iterator()))
}

missing_signals = []
signals_to_rename = {}
for signal in signals:
lowered_signal_name = prefix + signal.lower()
if lowered_signal_name not in all_signals:
missing_signals.append(lowered_signal_name)
if isinstance(parsed_file, SignalNamesInRows) and signal != signal.lower():
signal_name = prefix + signal if case_sensitive_signals else prefix + signal.lower()
if signal_name not in all_signals:
missing_signals.append(signal_name)
if (
not case_sensitive_signals
and isinstance(parsed_file, SignalNamesInRows)
and signal != signal.lower()
):
signals_to_rename[signal] = signal.lower()
else:
signal_match = all_signals[lowered_signal_name]
signal_match = all_signals[signal_name]
signal_name = signal_match.name.split(".")[-1]
if lowered_signal_name != signal_match.name or (
if signal_name != signal_match.name or (
isinstance(parsed_file, SignalNamesInRows) and signal != signal_name
):
signals_to_rename[signal] = signal_name
Expand Down
11 changes: 8 additions & 3 deletions exabel_data_sdk/services/file_time_series_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,10 @@ def sheet_name(self) -> Optional[str]:
return str(self.worksheet) if self.worksheet is not None else None

def parse_file(
self, nrows: Optional[int] = None, header: Optional[Sequence[int]] = None
self,
nrows: Optional[int] = None,
header: Optional[Sequence[int]] = None,
case_sensitive_signals: bool = False,
) -> pd.DataFrame:
"""Parse the file as a Pandas data frame."""
extension = Path(self.filename).suffix.lower()
Expand Down Expand Up @@ -136,7 +139,7 @@ def parse_file(
)
else:
raise FileLoadingException(f"Unknown file extension '{extension}'")
if not df.empty:
if not df.empty and not case_sensitive_signals:
df = df.rename(lambda n: n.lower(), axis="columns", level=0)
return df

Expand Down Expand Up @@ -216,9 +219,10 @@ def from_file(
namespace: str,
entity_mapping: Optional[Mapping[str, Mapping[str, str]]] = None,
entity_type: Optional[str] = None,
case_sensitive_signals: bool = False,
) -> "ParsedTimeSeriesFile":
"""Read a file and construct a parsed file from the contents."""
data = file_parser.parse_file()
data = file_parser.parse_file(case_sensitive_signals=case_sensitive_signals)
return cls.from_data_frame(data, entity_api, namespace, entity_mapping, entity_type)

@classmethod
Expand Down Expand Up @@ -710,6 +714,7 @@ def from_file(
namespace: str,
entity_mapping: Optional[Mapping[str, Mapping[str, str]]] = None,
entity_type: Optional[str] = None,
case_sensitive_signals: bool = False,
) -> "ParsedTimeSeriesFile":
"""Read a file and construct a new parser from the contents of that file."""
if entity_type is not None:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
brand;date;known_time;Signal1
A_brand;2021-01-01;2021-01-01;1
A_brand;2021-01-02;2021-01-02;2
A_brand;2021-01-03;2021-01-03;3
A_brand;2021-01-04;2021-01-04;4
A_brand;2021-01-05;2021-01-05;5
153 changes: 153 additions & 0 deletions exabel_data_sdk/tests/scripts/test_load_time_series_from_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,156 @@ def test_load_time_series_with_uppercase_signals_existing_and_uppercase_entity_t
check_freq=False,
)

def test_load_time_series_with_uppercase_signals_not_existing_case_sensitive(self):
args = common_args + [
"--filename",
"./exabel_data_sdk/tests/resources/data/timeseries_with_mixedcase_columns.csv",
"--create-missing-signals",
"--case-sensitive-signals",
]
script = LoadTimeSeriesFromFile(args)
self.client.signal_api.get_signal.return_value = None
self.client.signal_api.get_signal_iterator.side_effect = lambda *_: PagingResult([], "", 0)
self.client.entity_api.get_entity_type_iterator.side_effect = (
self._list_entity_types_uppercase
)

script.run_script(self.client, script.parse_arguments())

call_args_list = self.client.time_series_api.bulk_upsert_time_series.call_args_list
self.assertEqual(1, len(call_args_list))
series = call_args_list[0][0][0]
self.assertEqual(1, len(series))
call_args_list_create_signal = self.client.signal_api.create_signal.call_args_list
self.assertEqual(1, len(call_args_list_create_signal))
signal = call_args_list_create_signal[0][0][0]
self.assertEqual("signals/ns.Signal1", signal.name)

pd.testing.assert_series_equal(
pd.Series(
[1, 2, 3, 4, 5],
pd.MultiIndex.from_arrays(
[
pd.date_range("2021-01-01", periods=5, tz=tz.tzutc()),
pd.date_range("2021-01-01", periods=5, tz=tz.tzutc()),
]
),
name="entityTypes/BRAND/entities/ns.A_brand/signals/ns.Signal1",
),
series[0],
check_freq=False,
)

def test_load_time_series_with_uppercase_signals_and_lower_case_existing_case_sensitive(self):
args = common_args + [
"--filename",
"./exabel_data_sdk/tests/resources/data/timeseries_with_mixedcase_columns.csv",
"--create-missing-signals",
"--case-sensitive-signals",
]
script = LoadTimeSeriesFromFile(args)
self.client.signal_api.get_signal_iterator.side_effect = self._list_signal
self.client.signal_api.get_signal.return_value = None
self.client.signal_api.get_signal_iterator.side_effect = lambda *_: PagingResult([], "", 0)
self.client.entity_api.get_entity_type_iterator.side_effect = (
self._list_entity_types_uppercase
)

script.run_script(self.client, script.parse_arguments())

call_args_list = self.client.time_series_api.bulk_upsert_time_series.call_args_list
self.assertEqual(1, len(call_args_list))
series = call_args_list[0][0][0]
self.assertEqual(1, len(series))
call_args_list_create_signal = self.client.signal_api.create_signal.call_args_list
self.assertEqual(1, len(call_args_list_create_signal))
signal = call_args_list_create_signal[0][0][0]
self.assertEqual("signals/ns.Signal1", signal.name)

pd.testing.assert_series_equal(
pd.Series(
[1, 2, 3, 4, 5],
pd.MultiIndex.from_arrays(
[
pd.date_range("2021-01-01", periods=5, tz=tz.tzutc()),
pd.date_range("2021-01-01", periods=5, tz=tz.tzutc()),
]
),
name="entityTypes/BRAND/entities/ns.A_brand/signals/ns.Signal1",
),
series[0],
check_freq=False,
)

def test_load_time_series_with_uppercase_signals_existing_case_sensitive(self):
args = common_args + [
"--filename",
"./exabel_data_sdk/tests/resources/data/timeseries_with_mixedcase_columns.csv",
"--create-missing-signals",
"--case-sensitive-signals",
]
script = LoadTimeSeriesFromFile(args)
self.client.signal_api.get_signal_iterator.side_effect = self._list_signal_mixedcase
self.client.entity_api.get_entity_type_iterator.side_effect = self._list_entity_types
script.run_script(self.client, script.parse_arguments())

call_args_list = self.client.time_series_api.bulk_upsert_time_series.call_args_list
self.assertEqual(1, len(call_args_list))
self.assertEqual(0, len(self.client.signal_api.create_signal.call_args_list))
series = call_args_list[0][0][0]
self.assertEqual(1, len(series))

pd.testing.assert_series_equal(
pd.Series(
[1, 2, 3, 4, 5],
pd.MultiIndex.from_arrays(
[
pd.date_range("2021-01-01", periods=5, tz=tz.tzutc()),
pd.date_range("2021-01-01", periods=5, tz=tz.tzutc()),
]
),
name="entityTypes/brand/entities/ns.A_brand/signals/ns.Signal1",
),
series[0],
check_freq=False,
)

def test_load_time_series_with_mixedcase_signals_existing_and_entity_type_nonexisting_cs(
self,
):
args = common_args + [
"--filename",
"./exabel_data_sdk/tests/resources/data/timeseries_with_mixedcase_columns.csv",
"--create-missing-signals",
"--case-sensitive",
]
script = LoadTimeSeriesFromFile(args)
self.client.signal_api.get_signal_iterator.side_effect = self._list_signal_mixedcase
self.client.entity_api.get_entity_type_iterator.side_effect = self._list_entity_types

script.run_script(self.client, script.parse_arguments())

call_args_list = self.client.time_series_api.bulk_upsert_time_series.call_args_list
self.assertEqual(1, len(call_args_list))
self.assertEqual(0, len(self.client.signal_api.create_signal.call_args_list))
series = call_args_list[0][0][0]
self.assertEqual(1, len(series))

pd.testing.assert_series_equal(
pd.Series(
[1, 2, 3, 4, 5],
pd.MultiIndex.from_arrays(
[
pd.date_range("2021-01-01", periods=5, tz=tz.tzutc()),
pd.date_range("2021-01-01", periods=5, tz=tz.tzutc()),
]
),
name="entityTypes/brand/entities/ns.A_brand/signals/ns.Signal1",
),
series[0],
check_freq=False,
)

def _list_signal(self):
return iter(
[
Expand All @@ -709,6 +859,9 @@ def _list_signal(self):
def _list_signal_uppercase(self):
return iter([Signal("signals/ns.SIGNAL1", "The Signal", "A description of the signal")])

def _list_signal_mixedcase(self):
return iter([Signal("signals/ns.Signal1", "The Signal", "A description of the signal")])

def _list_entity_types(self):
return iter([EntityType("entityTypes/brand", "", "", False)])

Expand Down

0 comments on commit 80ca08c

Please sign in to comment.