Skip to content

Commit

Permalink
Validate signal names before performing API calls (#47)
Browse files Browse the repository at this point in the history
* Validate signal names before performing API calls
  • Loading branch information
aksestok authored Oct 26, 2021
1 parent f7786d5 commit f3a656e
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 11 deletions.
3 changes: 1 addition & 2 deletions VERSION
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
0.0.25

0.0.26
36 changes: 34 additions & 2 deletions exabel_data_sdk/scripts/load_time_series_from_csv.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
import argparse
import re
import sys
from typing import Sequence
from typing import List, Sequence

import pandas as pd
from dateutil import tz

from exabel_data_sdk import ExabelClient
from exabel_data_sdk.client.api.data_classes.signal import Signal
from exabel_data_sdk.scripts.csv_script import CsvScript
from exabel_data_sdk.util.resource_name_normalization import to_entity_resource_names
from exabel_data_sdk.util.resource_name_normalization import (
to_entity_resource_names,
validate_signal_name,
)


class LoadTimeSeriesFromCsv(CsvScript):
Expand Down Expand Up @@ -74,6 +78,34 @@ def run_script(self, client: ExabelClient, args: argparse.Namespace) -> None:

print("Loading signals", ", ".join(str(s) for s in signals), "...")

# validate signal names
missing_header_pattern = re.compile(r"^Unnamed: ([0-9]+)$")
missing_headers: List[str] = []
invalid_signals: List[str] = []
for signal in signals:
try:
validate_signal_name(signal)
except ValueError:
# Pandas eats up any blank column names and replaces them with "Unnamed: N". Since
# this is invalid but not the actual column name, we give the end user a more
# precise error message
missing_header_match = missing_header_pattern.match(signal)
if missing_header_match:
missing_headers.append(missing_header_match.group(1))
else:
invalid_signals.append(signal)
if invalid_signals or missing_headers:
print(
"Encountered invalid signal names. Signal names must start with a letter, "
"and can only consist of letters, numbers, and underscore (_), and be "
"at most 64 characters"
)
if invalid_signals:
print(f"Invalid signal names: {', '.join(invalid_signals)}")
if missing_headers:
print(f"The following column(s) are missing headers: {', '.join(missing_headers)}")
sys.exit(1)

prefix = "signals/"
if args.namespace:
prefix += args.namespace + "."
Expand Down
38 changes: 31 additions & 7 deletions exabel_data_sdk/tests/scripts/test_load_time_series_from_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,9 @@

from exabel_data_sdk import ExabelClient
from exabel_data_sdk.scripts.load_time_series_from_csv import LoadTimeSeriesFromCsv
from exabel_data_sdk.util.resource_name_normalization import validate_signal_name

common_args = [
"script-name",
"--sep",
";",
"--api-key",
"123",
]
common_args = ["script-name", "--sep", ";", "--api-key", "123"]


class TestUploadTimeSeries(unittest.TestCase):
Expand Down Expand Up @@ -207,6 +202,35 @@ def test_read_file_with_integer_identifiers(self):
check_freq=False,
)

def test_should_fail_with_invalid_signal_names(self):
signals_errors = {
"0_starts_with_0": "Signal name must start with a letter, "
'contain only letters, numbers, and underscores, but got "0_starts_with_0"',
"contains_!llegal_chars": "Signal name must start with a letter, "
'contain only letters, numbers, and underscores, but got "contains_!llegal_chars"',
"": "Signal name cannot be empty",
"signal_with_sixty_five_characters_in_length_which_more_than_max__": "Signal name "
"cannot be longer than 64 characters, but got "
'"signal_with_sixty_five_characters_in_length_which_more_than_max__"',
}

for signal, error in signals_errors.items():
with self.assertRaises(ValueError) as cm:
validate_signal_name(signal)
self.assertEqual(str(cm.exception), error)

def test_valid_signal_names(self):
valid_signals = [
"signal",
"SIGNAL",
"signal_with_underscores",
"signal_1_with_underscores_and_numbers",
"signal_with_sixty_four_characters_in_length_which_is_the_maximum",
]

for signal in valid_signals:
validate_signal_name(signal)


if __name__ == "__main__":
unittest.main()
16 changes: 16 additions & 0 deletions exabel_data_sdk/util/resource_name_normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,3 +168,19 @@ def to_entity_resource_names(
result = identifiers.map(mapping)
result.name = "entity"
return result


def validate_signal_name(name: str) -> None:
"""
Validate that the given signal name is a legal signal name. A signal name is a string that
starts with a letter, and can contain letters, numbers, and underscores.
"""
if not name:
raise ValueError("Signal name cannot be empty")
if len(name) > 64:
raise ValueError(f'Signal name cannot be longer than 64 characters, but got "{name}"')
if not re.match(r"^[a-zA-Z]\w{0,63}$", name):
raise ValueError(
f"Signal name must start with a letter, contain only letters, "
f'numbers, and underscores, but got "{name}"'
)

0 comments on commit f3a656e

Please sign in to comment.