From 39b95707aeb2f68259b709ccb01572c431748a1c Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 22 Nov 2024 19:36:05 -0500 Subject: [PATCH 1/2] add workaround patch of enum with colon failing CWL validation (relates to https://github.com/common-workflow-language/cwltool/issues/2071) --- CHANGES.rst | 5 ++ tests/processes/test_convert.py | 90 +++++++++++++++++++++++++++++++-- weaver/formats.py | 2 +- weaver/processes/convert.py | 48 +++++++++++++++--- 4 files changed, 134 insertions(+), 11 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index e2afd597a..b249e3174 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -65,6 +65,11 @@ Changes: Fixes: ------ +- Fix `CWL` ``enum`` type mishandling ``symbols`` containing a colon (``:``) character (e.g.: a list of allowed times) + leading to their invalid interpretation as namespaced strings (i.e.: ``:``), in turn failing validation + and breaking the resulting `CWL`. Such ``enum`` will be patched with updated ``symbols`` prefixed by ``#`` to respect + the expected URI representation of ``enum`` values by the `CWL` parser (relates to + `common-workflow-language/cwltool#2071 `_). - Fix `CWL` conversion from a `OGC API - Processes` definition specifying an `I/O` with ``schema`` explicitly indicating a ``type: array`` and nested ``enum``, even if ``minOccurs: 1`` is omitted or explicitly set. - Fix ``url`` parameter to override the `CLI` internal ``url`` when passed explicitly to the invoked operation. diff --git a/tests/processes/test_convert.py b/tests/processes/test_convert.py index 94e1e3a0e..f24bd2c1b 100644 --- a/tests/processes/test_convert.py +++ b/tests/processes/test_convert.py @@ -2236,29 +2236,34 @@ def test_ogcapi2cwl_process_without_extra(): @pytest.mark.parametrize( - ["input_str", "input_int", "input_float"], + ["input_str", "input_int", "input_float", "input_time"], [ # OpenAPI schema references ( {"schema": {"type": "string", "enum": ["a", "b", "c"]}}, {"schema": {"type": "integer", "enum": [1, 2, 3]}}, {"schema": {"type": "number", "format": "float", "enum": [1.2, 3.4]}}, + {"schema": {"type": "string", "format": "time", "enum": ["12:00", "24:00"]}}, ), # OGC-API input definitions ( {"data_type": "string", "allowed_values": ["a", "b", "c"]}, {"data_type": "integer", "allowed_values": [1, 2, 3]}, {"data_type": "float", "allowed_values": [1.2, 3.4]}, + {"data_type": "string", "allowed_values": ["12:00", "24:00"]}, ), ] ) -def test_ogcapi2cwl_process_cwl_enum_updated(input_str, input_int, input_float): +def test_ogcapi2cwl_process_cwl_enum_updated(input_str, input_int, input_float, input_time): """ - Test that a :term:`CWL` with pseudo-``Enum`` type has the necessary :term:`CWL` requirements to perform validation. + Test that a :term:`CWL` with pseudo-``enum`` type has the necessary :term:`CWL` requirements to perform validation. .. seealso:: - :func:`test_any2cwl_io_enum_convert` - :func:`test_any2cwl_io_enum_validate` + - :func:`weaver.processes.convert._convert_cwl_io_enum` + - :func:`weaver.processes.convert._get_cwl_js_value_from` + - :func:`weaver.processes.convert._patch_cwl_enum_js_requirement` """ href = "https://remote-server.com/processes/test-process" body = { @@ -2266,6 +2271,7 @@ def test_ogcapi2cwl_process_cwl_enum_updated(input_str, input_int, input_float): "enum-str": input_str, "enum-int": input_int, "enum-float": input_float, + "enum-time": input_time, }, "outputs": { "output": {"schema": {"type": "string", "contentMediaType": ContentType.TEXT_PLAIN}}, @@ -2295,6 +2301,84 @@ def test_ogcapi2cwl_process_cwl_enum_updated(input_str, input_int, input_float): assert "values.includes(self)" in cwl_value_from assert "self.every(item => values.includes(item))" not in cwl_value_from + assert cwl["inputs"]["enum-time"]["type"] == {"type": "enum", "symbols": ["#12:00", "#24:00"]} + assert "inputBinding" not in cwl["inputs"]["enum-time"] + + +@pytest.mark.parametrize( + ["cwl_io", "wps_io_expected"], + [ + ( + { + "name": "test", + "type": "enum", + "symbols": ["#12:00", "#24:00"], + }, + { + "id": "test", + "allowed_values": ["12:00", "24:00"], + } + ), + ( + { + "name": "test", + "type": { + "type": "enum", + "symbols": ["#12:00", "#24:00"], + } + }, + { + "id": "test", + "allowed_values": ["12:00", "24:00"], + } + ), + ( + { + "name": "test", + "type": { + "type": "array", + "items": { + "type": "enum", + "symbols": ["#12:00", "#24:00"], + } + } + }, + { + "id": "test", + "allowed_values": ["12:00", "24:00"], + "min_occurs": 1, + } + ), + ( + { + "name": "test", + "type": [ + "null", + { + "type": "array", + "items": { + "type": "enum", + "symbols": ["#12:00", "#24:00"], + } + } + ] + }, + { + "id": "test", + "allowed_values": ["12:00", "24:00"], + "min_occurs": 1, + } + ) + ] +) +def test_patched_cwl_enum_colon_back_conversion(cwl_io, wps_io_expected): + """ + Given a :term:`CWL` ``enum`` that contains ``:`` characters patched with prefixed ``#`` test the inverse conversion. + """ + wps_io = cwl2wps_io(cwl_io, IO_INPUT) + wps_allow = [allow.value for allow in wps_io.allowed_values] + assert wps_allow == ["12:00", "24:00"] + @mocked_remote_server_requests_wps1([ resources.TEST_REMOTE_SERVER_URL, diff --git a/weaver/formats.py b/weaver/formats.py index 133512880..cddc7dcf9 100644 --- a/weaver/formats.py +++ b/weaver/formats.py @@ -828,7 +828,7 @@ def get_cwl_file_format(media_type, make_reference=False, must_exist=True, allow ``must_exist=False`` before providing it to the `CWL` I/O definition. Setting ``must_exist=False`` should be used only for literal string comparison or pre-processing steps to evaluate formats. - :param media_type: Some reference, namespace'd or literal (possibly extended) media-type string. + :param media_type: Some reference, namespaced or literal (possibly extended) media-type string. :param make_reference: Construct the full URL reference to the resolved media-type. Otherwise, return tuple details. :param must_exist: Return result only if it can be resolved to an official media-type (or synonym if enabled), otherwise ``None``. diff --git a/weaver/processes/convert.py b/weaver/processes/convert.py index 0d0caf370..864c112f1 100644 --- a/weaver/processes/convert.py +++ b/weaver/processes/convert.py @@ -857,10 +857,12 @@ def _get_cwl_js_value_from(cwl_io_symbols, allow_unique, allow_array): def _convert_cwl_io_enum(cwl_io_type, cwl_io_symbols, io_select, allow_unique, allow_array): # type: (Union[str, Type[null]], List[AnyValueType], IO_Select_Type, bool, bool) -> CWL_IO_Type """ - Converts the :term:`I/O` definition to a :term:`CWL` :term:`I/O` that allows ``Enum``-like functionality. + Converts the :term:`I/O` definition to a :term:`CWL` :term:`I/O` that allows ``enum``-like functionality. In the event of an explicit ``string`` as base type, :term:`CWL` directly supports ``type: enum``. Other basic - types are not directly supported, and must instead perform manual validation against the set of allowed values. + types are not directly supported, and must instead perform manual validation against the set of allowed values, + using JavaScript evaluation applied by ``valueFrom`` against the the submitted values to replicate the automatic + behavior performed by ``enum`` type. .. seealso:: - https://github.com/common-workflow-language/cwl-v1.2/issues/267 @@ -871,15 +873,35 @@ def _convert_cwl_io_enum(cwl_io_type, cwl_io_symbols, io_select, allow_unique, a Because ``valueFrom`` can only be used with ``inputBinding``, any output providing a set of allowed values that are not ``string``-based will be ignored when converted to :term:`CWL` :term:`I/O`. + .. seealso:: + - :func:`_get_cwl_js_value_from` defines the ``enum``-like ``valueFrom`` checks + - :func:`_patch_cwl_enum_js_requirement` must be called on the entire :term:`CWL` to inject the + relevant :data:`CWL_REQUIREMENT_INLINE_JAVASCRIPT`, since it might not be defined in the original :term:`CWL`. + + Another edge-case that happens even if the base type is a ``string`` is when the enum ``symbols`` + contain an entry with a ``:`` character (e.g.: as in the case of a time ``HH:MM`` string). + In such case, :mod:`schema_salad` incorrectly parses it as if a namespaced :term:`URL` (i.e.: ``:``) + was specified. Because of this, the symbol strings get extended to an unresolvable namespace, which leads to a + partial and incorrect enum value (e.g.: ``abc:def`` becomes ``input-id#def``). This causes the resulting ``enum`` + to never accept the submitted values, or worst, causes the entire :term:`CWL` to fail validation if duplicate + values are obtained (e.g.: ``12:00`` and ``24:00`` both extend to ``input-id#00``, causing a duplicate ``00``). + To handle this case, the ``symbols`` can be updated with a prefixed ``#`` character, making :mod:`schema_salad` + interpret it as if it was already a :term:`URI` relative to the input, which is what it aims to generate, and + therefore interprets the rest of the string (including the ``:``) literally. + + .. seealso:: + - https://github.com/common-workflow-language/cwltool/issues/2071 + :param cwl_io_type: Basic type for which allowed values should apply. :param cwl_io_symbols: Allowed values to restrict the :term:`I/O` definition. :return: Converted definition as CWL Enum or with relevant value validation as applicable for the type. """ if cwl_io_type not in PACKAGE_BASIC_TYPES: return {} - if cwl_io_type == "string": + any_colon_symbol = any(":" in str(symbol) for symbol in cwl_io_symbols) + if cwl_io_type == "string" and not any_colon_symbol: return {"type": {"type": PACKAGE_ENUM_BASE, "symbols": cwl_io_symbols}} - if cwl_io_type not in PACKAGE_NUMERIC_TYPES: + if cwl_io_type not in PACKAGE_NUMERIC_TYPES and not (cwl_io_type == "string" and any_colon_symbol): LOGGER.warning( "Could not resolve conversion of CWL I/O as Enum for type '%s'. " "Ignoring value validation against specified allowed values: %s.", @@ -889,9 +911,10 @@ def _convert_cwl_io_enum(cwl_io_type, cwl_io_symbols, io_select, allow_unique, a return {"type": cwl_io_type} if not ( - (all(isinstance(value, bool) for value in cwl_io_symbols) and cwl_io_type == "boolean") or - (all(isinstance(value, int) for value in cwl_io_symbols) and cwl_io_type in PACKAGE_INTEGER_TYPES) or - (all(isinstance(value, float) for value in cwl_io_symbols) and cwl_io_type in PACKAGE_FLOATING_TYPES) + (cwl_io_type == "string" and all(isinstance(value, str) for value in cwl_io_symbols)) or + (cwl_io_type == "boolean" and all(isinstance(value, bool) for value in cwl_io_symbols)) or + (cwl_io_type in PACKAGE_INTEGER_TYPES and all(isinstance(value, int) for value in cwl_io_symbols)) or + (cwl_io_type in PACKAGE_FLOATING_TYPES and all(isinstance(value, float) for value in cwl_io_symbols)) ): LOGGER.warning( "Incompatible CWL I/O type '%s' detected for specified allowed values: %s. " @@ -901,6 +924,10 @@ def _convert_cwl_io_enum(cwl_io_type, cwl_io_symbols, io_select, allow_unique, a ) cwl_io_type = "Any" + if any_colon_symbol: + cwl_io_symbols = [f"#{symbol}" for symbol in cwl_io_symbols] + return {"type": cwl_io_type, "symbols": cwl_io_symbols} + if io_select != IO_INPUT: return {"type": cwl_io_type} @@ -1391,6 +1418,13 @@ def parse_cwl_enum_type(io_info): f"Unsupported I/O 'enum' base type: `{type(first_allow)!s}`, from definition: `{io_info!r}`." ) + if io_type == "string": + # un-patch enum causing CWL parsing errors caused by ':' (see _convert_cwl_io_enum) + io_allow = [ + allow[1:] if allow.startswith("#") and ":" in allow else allow + for allow in io_allow + ] + io_def = CWLIODefinition( type=io_type, # type: ignore enum=True, From c12bc6347df37c3f2d38e91e32127b5b29638ca8 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Sat, 23 Nov 2024 01:07:43 -0500 Subject: [PATCH 2/2] fix CWL enum type invalid --- tests/processes/test_convert.py | 2 +- weaver/processes/convert.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/processes/test_convert.py b/tests/processes/test_convert.py index f24bd2c1b..839876404 100644 --- a/tests/processes/test_convert.py +++ b/tests/processes/test_convert.py @@ -2301,7 +2301,7 @@ def test_ogcapi2cwl_process_cwl_enum_updated(input_str, input_int, input_float, assert "values.includes(self)" in cwl_value_from assert "self.every(item => values.includes(item))" not in cwl_value_from - assert cwl["inputs"]["enum-time"]["type"] == {"type": "enum", "symbols": ["#12:00", "#24:00"]} + assert cwl["inputs"]["enum-time"] == {"type": "enum", "symbols": ["#12:00", "#24:00"]} assert "inputBinding" not in cwl["inputs"]["enum-time"] diff --git a/weaver/processes/convert.py b/weaver/processes/convert.py index 864c112f1..967be596a 100644 --- a/weaver/processes/convert.py +++ b/weaver/processes/convert.py @@ -926,7 +926,7 @@ def _convert_cwl_io_enum(cwl_io_type, cwl_io_symbols, io_select, allow_unique, a if any_colon_symbol: cwl_io_symbols = [f"#{symbol}" for symbol in cwl_io_symbols] - return {"type": cwl_io_type, "symbols": cwl_io_symbols} + return {"type": PACKAGE_ENUM_BASE, "symbols": cwl_io_symbols} if io_select != IO_INPUT: return {"type": cwl_io_type}