From b352fd3391954db9d752ca575a87eaae029e3af4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez-Mondrag=C3=B3n?= Date: Fri, 30 Aug 2024 19:05:27 -0600 Subject: [PATCH 1/3] feat: Environment variables are now parsed for boolean, integer, array and object setting values --- poetry.lock | 17 +++- pyproject.toml | 1 + singer_sdk/configuration/_dict_config.py | 45 ++++++--- tests/core/configuration/test_dict_config.py | 97 ++++++++++++++------ 4 files changed, 117 insertions(+), 43 deletions(-) diff --git a/poetry.lock b/poetry.lock index cbe1d81a5..671223076 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1572,6 +1572,21 @@ files = [ [package.dependencies] pytest = ">=3.0.0" +[[package]] +name = "pytest-subtests" +version = "0.13.1" +description = "unittest subTest() support and subtests fixture" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest_subtests-0.13.1-py3-none-any.whl", hash = "sha256:ab616a22f64cd17c1aee65f18af94dbc30c444f8683de2b30895c3778265e3bd"}, + {file = "pytest_subtests-0.13.1.tar.gz", hash = "sha256:989e38f0f1c01bc7c6b2e04db7d9fd859db35d77c2c1a430c831a70cbf3fde2d"}, +] + +[package.dependencies] +attrs = ">=19.2.0" +pytest = ">=7.0" + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -2592,4 +2607,4 @@ testing = ["pytest"] [metadata] lock-version = "2.0" python-versions = ">=3.8" -content-hash = "34a80dcb1e59005ff2d6e534b2fed2cf683cf8c0b4a54ce5af6e779cf07090d0" +content-hash = "cf19d810b0e586f811a304ae23bdd21d76e0f5d6ab0fb382afa5375b6f9518d3" diff --git a/pyproject.toml b/pyproject.toml index bde6e8519..0c84e3026 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -128,6 +128,7 @@ duckdb-engine = { version = ">=0.9.4", python = "<4" } fastjsonschema = ">=2.19.1" pytest-benchmark = ">=4.0.0" pytest-snapshot = ">=0.9.0" +pytest-subtests = ">=0.13.1" pytz = ">=2022.2.1" requests-mock = ">=1.10.0" rfc3339-validator = ">=0.1.4" diff --git a/singer_sdk/configuration/_dict_config.py b/singer_sdk/configuration/_dict_config.py index fd8217f01..81eb5565e 100644 --- a/singer_sdk/configuration/_dict_config.py +++ b/singer_sdk/configuration/_dict_config.py @@ -5,16 +5,27 @@ import logging import os import typing as t +import warnings from pathlib import Path from dotenv import find_dotenv from dotenv.main import DotEnv -from singer_sdk.helpers._typing import is_string_array_type -from singer_sdk.helpers._util import read_json_file +from singer_sdk.helpers import _typing +from singer_sdk.helpers._util import load_json, read_json_file logger = logging.getLogger(__name__) +TRUTHY = ("true", "1", "yes", "on") + + +def _parse_array(value: str) -> list[str]: + return load_json(value) # type: ignore[return-value] + + +def _legacy_parse_array_of_strings(value: str) -> list[str]: + return value.split(",") + def parse_environment_config( config_schema: dict[str, t.Any], @@ -29,9 +40,6 @@ def parse_environment_config( dotenv_path: Path to a .env file. If None, will try to find one in increasingly higher folders. - Raises: - ValueError: If an un-parsable setting is found. - Returns: A configuration dictionary. """ @@ -43,7 +51,7 @@ def parse_environment_config( logger.debug("Loading configuration from %s", dotenv_path) DotEnv(dotenv_path).set_as_environment_variables() - for config_key in config_schema["properties"]: + for config_key, schema in config_schema.get("properties", {}).items(): env_var_name = prefix + config_key.upper().replace("-", "_") if env_var_name in os.environ: env_var_value = os.environ[env_var_name] @@ -52,15 +60,24 @@ def parse_environment_config( config_key, env_var_name, ) - if is_string_array_type(config_schema["properties"][config_key]): - if env_var_value[0] == "[" and env_var_value[-1] == "]": - msg = ( - "A bracketed list was detected in the environment variable " - f"'{env_var_name}'. This syntax is no longer supported. Please " - "remove the brackets and try again." + if _typing.is_integer_type(schema): + result[config_key] = int(env_var_value) + elif _typing.is_boolean_type(schema): + result[config_key] = env_var_value.lower() in TRUTHY + elif _typing.is_string_array_type(schema): + try: + result[config_key] = _parse_array(env_var_value) + except Exception: # noqa: BLE001 + warnings.warn( + "Parsing array from deprecated string format 'x,y,z'", + DeprecationWarning, + stacklevel=2, ) - raise ValueError(msg) - result[config_key] = env_var_value.split(",") + result[config_key] = _legacy_parse_array_of_strings(env_var_value) + elif _typing.is_array_type(schema): + result[config_key] = _parse_array(env_var_value) + elif _typing.is_object_type(schema): + result[config_key] = load_json(env_var_value) else: result[config_key] = env_var_value return result diff --git a/tests/core/configuration/test_dict_config.py b/tests/core/configuration/test_dict_config.py index 136b72e72..42f2a8fb4 100644 --- a/tests/core/configuration/test_dict_config.py +++ b/tests/core/configuration/test_dict_config.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import typing as t from pathlib import Path import pytest @@ -11,10 +12,23 @@ parse_environment_config, ) +if t.TYPE_CHECKING: + from pytest_subtests import SubTests + CONFIG_JSONSCHEMA = th.PropertiesList( th.Property("prop1", th.StringType, required=True), th.Property("prop2", th.StringType), th.Property("prop3", th.ArrayType(th.StringType)), + th.Property("prop4", th.IntegerType), + th.Property("prop5", th.BooleanType), + th.Property("prop6", th.ArrayType(th.IntegerType)), + th.Property( + "prop7", + th.ObjectType( + th.Property("sub_prop1", th.StringType), + th.Property("sub_prop2", th.IntegerType), + ), + ), ).to_dict() @@ -36,30 +50,66 @@ def config_file2(tmpdir) -> str: return filepath -def test_get_env_var_config(monkeypatch: pytest.MonkeyPatch): +def test_get_env_var_config(monkeypatch: pytest.MonkeyPatch, subtests: SubTests): """Test settings parsing from environment variables.""" with monkeypatch.context() as m: m.setenv("PLUGIN_TEST_PROP1", "hello") - m.setenv("PLUGIN_TEST_PROP3", "val1,val2") - m.setenv("PLUGIN_TEST_PROP4", "not-a-tap-setting") + m.setenv("PLUGIN_TEST_PROP3", '["val1","val2"]') + m.setenv("PLUGIN_TEST_PROP4", "123") + m.setenv("PLUGIN_TEST_PROP5", "TRUE") + m.setenv("PLUGIN_TEST_PROP6", "[1,2,3]") + m.setenv("PLUGIN_TEST_PROP7", '{"sub_prop1": "hello", "sub_prop2": 123}') + m.setenv("PLUGIN_TEST_PROP999", "not-a-tap-setting") env_config = parse_environment_config(CONFIG_JSONSCHEMA, "PLUGIN_TEST_") - assert env_config["prop1"] == "hello" - assert env_config["prop3"] == ["val1", "val2"] - assert "PROP1" not in env_config - assert "prop2" not in env_config - assert "PROP2" not in env_config - assert "prop4" not in env_config - assert "PROP4" not in env_config + + with subtests.test(msg="Parse string from environment"): + assert env_config["prop1"] == "hello" + + with subtests.test(msg="Parse array from environment"): + assert env_config["prop3"] == ["val1", "val2"] + + with subtests.test(msg="Parse integer from environment"): + assert env_config["prop4"] == 123 + + with subtests.test(msg="Parse boolean from environment"): + assert env_config["prop5"] is True + + with subtests.test(msg="Parse array of integers from environment"): + assert env_config["prop6"] == [1, 2, 3] + + with subtests.test(msg="Parse object from environment"): + assert env_config["prop7"] == {"sub_prop1": "hello", "sub_prop2": 123} + + with subtests.test(msg="Ignore non-tap setting"): + missing_props = {"PROP1", "prop2", "PROP2", "prop999", "PROP999"} + assert not set.intersection(missing_props, env_config) + + m.setenv("PLUGIN_TEST_PROP3", "val1,val2") + with subtests.test(msg="Legacy array parsing"), pytest.warns( + DeprecationWarning + ): + parsed = parse_environment_config(CONFIG_JSONSCHEMA, "PLUGIN_TEST_") + assert parsed["prop3"] == ["val1", "val2"] no_env_config = parse_environment_config(CONFIG_JSONSCHEMA, "PLUGIN_TEST_") - assert "prop1" not in no_env_config - assert "PROP1" not in env_config - assert "prop2" not in no_env_config - assert "PROP2" not in env_config - assert "prop3" not in no_env_config - assert "PROP3" not in env_config - assert "prop4" not in no_env_config - assert "PROP4" not in env_config + missing_props = { + "prop1", + "PROP1", + "prop2", + "PROP2", + "prop3", + "PROP3", + "prop4", + "PROP4", + "prop5", + "PROP5", + "prop6", + "PROP6", + "prop999", + "PROP999", + } + with subtests.test(msg="Ignore missing environment variables"): + assert not set.intersection(missing_props, no_env_config) def test_get_dotenv_config(tmp_path: Path): @@ -74,15 +124,6 @@ def test_get_dotenv_config(tmp_path: Path): assert dotenv_config["prop1"] == "hello" -def test_get_env_var_config_not_parsable(monkeypatch: pytest.MonkeyPatch): - """Test settings parsing from environment variables with a non-parsable value.""" - with monkeypatch.context() as m: - m.setenv("PLUGIN_TEST_PROP1", "hello") - m.setenv("PLUGIN_TEST_PROP3", '["repeated"]') - with pytest.raises(ValueError, match="A bracketed list was detected"): - parse_environment_config(CONFIG_JSONSCHEMA, "PLUGIN_TEST_") - - def test_merge_config_sources( config_file1, config_file2, @@ -91,7 +132,7 @@ def test_merge_config_sources( """Test merging multiple configuration sources.""" with monkeypatch.context() as m: m.setenv("PLUGIN_TEST_PROP1", "from-env") - m.setenv("PLUGIN_TEST_PROP4", "not-a-tap-setting") + m.setenv("PLUGIN_TEST_PROP999", "not-a-tap-setting") config = merge_config_sources( [config_file1, config_file2, "ENV"], CONFIG_JSONSCHEMA, From b734cab826c6cfc0381e7866947a5c035e0ddd29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez-Mondrag=C3=B3n?= Date: Wed, 16 Oct 2024 23:06:12 -0600 Subject: [PATCH 2/3] Add docs --- docs/implementation/cli.md | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/docs/implementation/cli.md b/docs/implementation/cli.md index 94077fc5d..92d77fa37 100644 --- a/docs/implementation/cli.md +++ b/docs/implementation/cli.md @@ -68,9 +68,19 @@ values from environment variables, and from a `.env` file if present within the working directory, which match the exact name of a setting, along with a prefix determined by the plugin name. -> For example: For a sample plugin named `tap-my-example` and settings named "username" and "access_key", the SDK will automatically scrape -> the settings from environment variables `TAP_MY_EXAMPLE_USERNAME` and -> `TAP_MY_EXAMPLE_ACCESS_KEY`, if they exist. +```{note} +For example, for a sample plugin named `tap-my-example` and settings named `username` and `access_key`, the SDK will automatically scrape +the settings from environment variables `TAP_MY_EXAMPLE_USERNAME` and +`TAP_MY_EXAMPLE_ACCESS_KEY` respectively, if they exist. +``` + +The following value types are automatically cast to the appropriate Python type: + +- integer (e.g. `TAP_MY_EXAMPLE_PORT=5432`) +- boolean (e.g. `TAP_MY_EXAMPLE_DEBUG=true`) +- JSON arrays (e.g. `TAP_MY_EXAMPLE_ARRAY='["a", "b", "c"]'`) +- JSON objects (e.g. `TAP_MY_EXAMPLE_OBJECT='{"key": "value"}'`) + ## Tap-Specific CLI Options From b7b50a3975ba4733fc193694209a028637756da9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez-Mondrag=C3=B3n?= Date: Wed, 16 Oct 2024 23:14:33 -0600 Subject: [PATCH 3/3] Do not deprecate just yet --- singer_sdk/configuration/_dict_config.py | 10 +++++----- tests/core/configuration/test_dict_config.py | 16 +++++++++++++--- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/singer_sdk/configuration/_dict_config.py b/singer_sdk/configuration/_dict_config.py index 81eb5565e..af1d19f29 100644 --- a/singer_sdk/configuration/_dict_config.py +++ b/singer_sdk/configuration/_dict_config.py @@ -5,7 +5,6 @@ import logging import os import typing as t -import warnings from pathlib import Path from dotenv import find_dotenv @@ -68,10 +67,11 @@ def parse_environment_config( try: result[config_key] = _parse_array(env_var_value) except Exception: # noqa: BLE001 - warnings.warn( - "Parsing array from deprecated string format 'x,y,z'", - DeprecationWarning, - stacklevel=2, + # TODO(edgarrmondragon): Make this a deprecation warning. + # https://github.com/meltano/sdk/issues/2724 + logger.warning( + "Parsing array of the form 'x,y,z' is deprecated and will be " + "removed in future versions.", ) result[config_key] = _legacy_parse_array_of_strings(env_var_value) elif _typing.is_array_type(schema): diff --git a/tests/core/configuration/test_dict_config.py b/tests/core/configuration/test_dict_config.py index 42f2a8fb4..851f9970a 100644 --- a/tests/core/configuration/test_dict_config.py +++ b/tests/core/configuration/test_dict_config.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import logging import typing as t from pathlib import Path @@ -50,7 +51,11 @@ def config_file2(tmpdir) -> str: return filepath -def test_get_env_var_config(monkeypatch: pytest.MonkeyPatch, subtests: SubTests): +def test_get_env_var_config( + monkeypatch: pytest.MonkeyPatch, + subtests: SubTests, + caplog: pytest.LogCaptureFixture, +): """Test settings parsing from environment variables.""" with monkeypatch.context() as m: m.setenv("PLUGIN_TEST_PROP1", "hello") @@ -85,12 +90,17 @@ def test_get_env_var_config(monkeypatch: pytest.MonkeyPatch, subtests: SubTests) assert not set.intersection(missing_props, env_config) m.setenv("PLUGIN_TEST_PROP3", "val1,val2") - with subtests.test(msg="Legacy array parsing"), pytest.warns( - DeprecationWarning + with subtests.test(msg="Legacy array parsing"), caplog.at_level( + logging.WARNING, ): parsed = parse_environment_config(CONFIG_JSONSCHEMA, "PLUGIN_TEST_") assert parsed["prop3"] == ["val1", "val2"] + assert any( + "Parsing array of the form 'x,y,z'" in log.message + for log in caplog.records + ) + no_env_config = parse_environment_config(CONFIG_JSONSCHEMA, "PLUGIN_TEST_") missing_props = { "prop1",