Skip to content

Commit

Permalink
Sanitize NSVCA values on modulemd YAML files
Browse files Browse the repository at this point in the history
- Replace yaml.safe_load by YAML loader that prevents unquoted numbers
  in NSVCA fields to loose trailing/heading zeros.
- Add unit test to `parse_modular` for the specific issue
- OBSOLETE_BY_STREAM attr was named as "obsoleted_by_module_name" instead
of "obsoleted_by_name_stream". Fix tests accordingly.
- Refactor parse_modular, docstrings and small type annotations

closes #3285

(cherry picked from commit 35ff2f8)
  • Loading branch information
pedro-psb authored and patchback[bot] committed Dec 12, 2023
1 parent 9fd01cf commit a2b0b30
Show file tree
Hide file tree
Showing 6 changed files with 194 additions and 12 deletions.
1 change: 1 addition & 0 deletions CHANGES/3285.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added support for preventing unquoted NSVCA numerical values (e.g. ``"stream": 2.10``) of having zeros stripped on modulemd YAML files.
2 changes: 1 addition & 1 deletion pulp_rpm/app/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@
CONTEXT="module_context",
EOL="eol_date",
OBSOLETE_BY_MODULE="obsoleted_by_module_name",
OBSOLETE_BY_STREAM="obsoleted_by_module_name",
OBSOLETE_BY_STREAM="obsoleted_by_module_stream",
)

# Mandatory fields for Modulemd types
Expand Down
55 changes: 50 additions & 5 deletions pulp_rpm/app/modulemd.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import tempfile
import yaml
import collections

from jsonschema import Draft7Validator
from gettext import gettext as _ # noqa:F401
Expand Down Expand Up @@ -61,13 +62,16 @@ def modules_packages(modules):
version.add_content(Package.objects.filter(pk__in=packages_to_add))


def split_modulemd_file(file):
def split_modulemd_file(file: str):
"""
Helper method to preserve original formatting of modulemd.
Args:
file: Absolute path to file
"""
with tempfile.TemporaryDirectory(dir=".") as tf:
decompressed_path = os.path.join(tf, "modulemd.yaml")
cr.decompress_file(file.path, decompressed_path, cr.AUTO_DETECT_COMPRESSION)
cr.decompress_file(file, decompressed_path, cr.AUTO_DETECT_COMPRESSION)
with open(decompressed_path) as modulemd_file:
for doc in modulemd_file.read().split("---"):
# strip any spaces or newlines from either side, strip the document end marking,
Expand All @@ -93,7 +97,7 @@ def check_mandatory_module_fields(module, required_fields):

def create_modulemd(modulemd, snippet):
"""
Create dict with modulemd data to can be saved to DB.
Create dict with modulemd data to be saved to DB.
"""
new_module = dict()
new_module[PULP_MODULE_ATTR.NAME] = modulemd["data"].get("name")
Expand Down Expand Up @@ -174,16 +178,19 @@ def create_modulemd_obsoletes(obsolete, snippet):
return new_obsolete


def parse_modular(file):
def parse_modular(file: str):
"""
Parse all modular metadata.
Args:
file: Absolute path to file
"""
modulemd_all = []
modulemd_defaults_all = []
modulemd_obsoletes_all = []

for module in split_modulemd_file(file):
parsed_data = yaml.safe_load(module)
parsed_data = yaml.load(module, Loader=ModularYamlLoader)
# here we check the modulemd document as we don't store all info, so serializers
# are not enough then we only need to take required data from dict which is
# parsed by pyyaml library
Expand All @@ -210,3 +217,41 @@ def parse_modular(file):
logging.warning(f"Unknown modular document type found: {parsed_data.get('document')}")

return modulemd_all, modulemd_defaults_all, modulemd_obsoletes_all


class ModularYamlLoader(yaml.SafeLoader):
"""
Custom Loader that preserve unquoted float in specific fields (see #3285).
Motivation (for customizing YAML parsing) is that libmodulemd also implement safe-quoting:
https://github.com/fedora-modularity/libmodulemd/blob/main/modulemd/tests/test-modulemd-quoting.c
This class is based on https://stackoverflow.com/a/74334992
"""

# Field to preserve (will bypass yaml casting)
PRESERVED_FIELDS = ("name", "stream", "version", "context", "arch")

def construct_mapping(self, node, deep=False):
if not isinstance(node, yaml.MappingNode):
raise yaml.constructor.ConstructorError(
None, None, "expected a mapping node, but found %s" % node.id, node.start_mark
)
mapping = {}
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
if not isinstance(key, collections.abc.Hashable):
raise yaml.constructor.ConstructorError(
"while constructing a mapping",
node.start_mark,
"found unhashable key",
key_node.start_mark,
)
if key in ModularYamlLoader.PRESERVED_FIELDS and isinstance(
value_node, yaml.ScalarNode
):
value = value_node.value
else:
value = self.construct_object(value_node, deep=deep)
mapping[key] = value
return mapping
9 changes: 7 additions & 2 deletions pulp_rpm/app/tasks/synchronizing.py
Original file line number Diff line number Diff line change
Expand Up @@ -973,8 +973,13 @@ async def parse_repository_metadata(self, repomd, metadata_results):
await self.put(dc)

async def parse_modules_metadata(self, modulemd_result):
"""Parse modules' metadata which define what packages are built for specific releases."""
modulemd_all, defaults_all, obsoletes_all = parse_modular(modulemd_result)
"""
Parse modules' metadata which define what packages are built for specific releases.
Args:
modulemd_result(pulpcore.download.base.DownloadResult): downloaded modulemd file
"""
modulemd_all, defaults_all, obsoletes_all = parse_modular(modulemd_result.path)

modulemd_dcs = []

Expand Down
8 changes: 4 additions & 4 deletions pulp_rpm/tests/functional/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -1588,8 +1588,8 @@
"override_previous": None,
"module_context": "6c81f848",
"eol_date": None,
"obsoleted_by_module_name": "10",
"obsoleted_by_module_stream": None,
"obsoleted_by_module_name": "nodejs",
"obsoleted_by_module_stream": "10",
},
{
"modified": "2020-05-01T00:00Z",
Expand Down Expand Up @@ -1621,8 +1621,8 @@
"override_previous": None,
"module_context": None,
"eol_date": None,
"obsoleted_by_module_name": "5",
"obsoleted_by_module_stream": None,
"obsoleted_by_module_name": "nodejs",
"obsoleted_by_module_stream": "5",
},
]

Expand Down
131 changes: 131 additions & 0 deletions pulp_rpm/tests/unit/test_modulemd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
from pulp_rpm.app.modulemd import parse_modular
import os

sample_file_data = """
---
document: modulemd
version: 2
data:
name: kangaroo
stream: 1.10
version: 20180730223407
context: deadbeef
static_context: true
arch: noarch
summary: Kangaroo 0.3 module
description: >-
A module for the kangaroo 0.3 package
license:
module:
- MIT
content:
- MIT
profiles:
default:
rpms:
- kangaroo
artifacts:
rpms:
- kangaroo-0:0.3-1.noarch
...
---
document: modulemd
version: 2
data:
name: kangaroo
stream: "1.10"
version: 20180704111719
context: deadbeef
static_context: false
arch: noarch
summary: Kangaroo 0.2 module
description: >-
A module for the kangaroo 0.2 package
license:
module:
- MIT
content:
- MIT
profiles:
default:
rpms:
- kangaroo
artifacts:
rpms:
- kangaroo-0:0.2-1.noarch
...
---
document: modulemd-defaults
version: 1
data:
module: avocado
modified: 202002242100
profiles:
"5.30": [default]
"stream": [default]
...
---
document: modulemd-obsoletes
version: 1
data:
modified: 2022-01-24T08:54Z
module: perl
stream: 5.30
eol_date: 2021-06-01T00:00Z
message: Module stream perl:5.30 is no longer supported. Please switch to perl:5.32
obsoleted_by:
module: perl
stream: 5.40
...
"""


def test_parse_modular_preserves_literal_unquoted_values_3285(tmp_path):
"""Unquoted yaml numbers with trailing/leading zeros are preserved on specific fields"""

# write data to test_file
os.chdir(tmp_path)
file_name = "modulemd.yaml"
with open(file_name, "w") as file:
file.write(sample_file_data)

all, defaults, obsoletes = parse_modular(file_name)

# check normal, defaults and obsoletes modulemds
kangoroo1 = all[0] # unquoted
kangoroo2 = all[1] # quoted
modulemd_defaults = defaults[0]
modulemd_obsoletes = obsoletes[0]

assert kangoroo1["name"] == "kangaroo"
assert kangoroo1["stream"] == "1.10" # should not be 1.1
assert kangoroo1["version"] == "20180730223407"
assert kangoroo1["context"] == "deadbeef"
assert kangoroo1["static_context"] is True
assert kangoroo1["arch"] == "noarch"

assert kangoroo2["name"] == "kangaroo"
assert kangoroo2["stream"] == "1.10"
assert kangoroo2["version"] == "20180704111719"
assert kangoroo2["context"] == "deadbeef"
assert kangoroo2["static_context"] is False
assert kangoroo2["arch"] == "noarch"

# 'stream' keys which have non-scalar values (e.g. list) are parsed normally.
# Otherwise, weird results are produced (internal pyyaml objects)
assert modulemd_defaults["module"] == "avocado"
assert modulemd_defaults.get("modified") is None # not present
assert modulemd_defaults["profiles"]["stream"] == ["default"]
assert modulemd_defaults["profiles"]["5.30"] == ["default"]

# parse_modular changes the structure and key names for obsoletes
assert modulemd_obsoletes["modified"] == "2022-01-24T08:54Z"
assert modulemd_obsoletes["module_name"] == "perl"
assert modulemd_obsoletes["module_stream"] == "5.30"
assert modulemd_obsoletes["eol_date"] == "2021-06-01T00:00Z"
assert (
modulemd_obsoletes["message"]
== "Module stream perl:5.30 is no longer supported. Please switch to perl:5.32"
)
assert modulemd_obsoletes["obsoleted_by_module_name"] == "perl"
assert modulemd_obsoletes["obsoleted_by_module_stream"] == "5.40"

0 comments on commit a2b0b30

Please sign in to comment.