Skip to content

Improve purl matching #17

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions tests/test_match.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# SPDX-FileCopyrightText: 2025 Siemens AG
# SPDX-License-Identifier: MIT

import pytest
from packageurl import PackageURL

from vilocify.match import match_purl

distro_purls = [
("pkg:deb/debian/[email protected]%2Bdeb12u10?arch=amd64&distro=debian-12", "Debian 12 Package: base-files"),
("pkg:deb/debian/[email protected]?distro=ubuntu-20.04", "Debian Package: openssl"),
("pkg:deb/debian/[email protected]", "Debian Package: bash"),
("pkg:apk/alpine/musl", "Alpine Package: musl"),
("pkg:apk/alpine/[email protected]?distro=alpine-3.21.3", "Alpine 3.21 Package: musl"),
(
"pkg:rpm/redhat/[email protected]?arch=x86_64&distro=rhel-8.10&upstream=acl-2.2.53-3.el8.src.rpm",
"RHEL 8 Package: acl",
),
(
"pkg:rpm/sles/[email protected]%2Bgit20180409.04c9dae-150300.10.28.2?arch=x86_64&distro=sles-15.6",
"SUSE Linux Enterprise Server 15 SP6 Package: aaa_base",
),
("pkg:rpm/amzn/[email protected]?arch=noarch&distro=amzn-2", "Amazon Linux 2 Package: basesystem"),
("pkg:rpm/amzn/[email protected]?arch=x86_64&distro=amzn-2018.03", "Amazon Linux Package: bzip2-libs"),
(
"pkg:rpm/amzn/[email protected]?arch=x86_64&distro=amzn-2023",
"Amazon Linux 2023 Package: alternatives",
),
]

package_purls = [
("pkg:npm/%40angular/[email protected]", "Node.js Package: @angular/animations", "19.2.2"),
(
"pkg:golang/github.com/Azure/azure-sdk-for-go/sdk/[email protected]?type=module",
"Go Package: github.com/Azure/azure-sdk-for-go/sdk/azcore",
"1.18.0",
),
("pkg:gem/[email protected]", "RubyGem: actionpack", "7.2.2.1"),
("pkg:composer/composer/[email protected]", "PHP Package: composer/pcre", "3.3.1"),
]

unknown_purls = [
"pkg:conan/[email protected]",
"pkg:deb/[email protected]",
"pkg:android/com.android.dialer@35",
"pkg:github/package-url/purl-spec@244fd47e07d1004f0aed9c",
]


@pytest.mark.parametrize(("purl", "expected"), distro_purls)
def test_match_distro_purl(purl: str, expected: str):
name, version = match_purl(PackageURL.from_string(purl))
assert name == expected
assert version == "All Versions"


@pytest.mark.parametrize(("purl", "expected_name", "expected_version"), package_purls)
def test_match_package_purls(purl: str, expected_name: str, expected_version: str):
name, version = match_purl(PackageURL.from_string(purl))
assert name == expected_name
assert version == expected_version


@pytest.mark.parametrize("purl", unknown_purls)
def test_match_unknown_purl(purl: str):
name, version = match_purl(PackageURL.from_string(purl))
assert name is None
assert version is None
43 changes: 4 additions & 39 deletions vilocify/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from cyclonedx.model.bom import Component as BomComponent

from vilocify.http import JSONAPIRequestError, RequestError
from vilocify.match import MissingPurlError, match_bom_component
from vilocify.models import (
Component,
ComponentRequest,
Expand All @@ -26,18 +27,13 @@

logger = logging.getLogger(__name__)


version_text = """Vilocify Python SDK, version %(version)s

Copyright (C) 2025 Siemens AG
MIT License
"""


class MissingPurlError(Exception):
"""Raised when importing an SBOM component that has no PURL"""


class BadCycloneDXFileError(Exception):
"""Raised on unsupported file extensions of a CycloneDX file"""

Expand Down Expand Up @@ -72,44 +68,13 @@ def notifications(monitoring_list: str, since: datetime):
print(f"No new notifications for monitoringlist #{monitoring_list} since {since.isoformat()}.")


def _vilocify_matcher_for_bom_component(bom_component: BomComponent) -> tuple[str | None, str | None]:
purl = bom_component.purl
if purl is None:
raise MissingPurlError(f"purl is missing for BOM component {bom_component}")
vilocify_name_prefixes = {
"cargo": "Rust Crate",
"gem": "RubyGem",
"golang": "Go Package",
"npm": "Node.js Package",
"nuget": "NuGet Package",
"pypi": "Python Package",
}
if purl.type in vilocify_name_prefixes:
version = bom_component.version
if version is not None:
version = version.lstrip("v")
return (
f"{vilocify_name_prefixes[purl.type]}: {purl.namespace + '/' if purl.namespace else ''}{purl.name}",
version,
)
if purl.type == "rpm" and purl.namespace == "fedora":
return f"Fedora Package: {purl.name}", "All Versions"
if purl.type == "deb":
if purl.namespace == "debian":
return f"Debian Package: {purl.name}", "All Versions"
if purl.namespace == "ubuntu":
return f"Ubuntu Package: {purl.name}", "All Versions"

return None, None


def _from_component_request(bom_component: BomComponent) -> Component | None:
cr = ComponentRequest.where("componentUrl", "eq", str(bom_component.purl)).first()
if cr is None:
name, _ = _vilocify_matcher_for_bom_component(bom_component)
name, version = match_bom_component(bom_component)
cr = ComponentRequest(
name=name or bom_component.name,
version=bom_component.version,
version=version or bom_component.version,
component_url=str(bom_component.purl),
comment="Auto-created by vilocify-sdk-python",
)
Expand All @@ -119,7 +84,7 @@ def _from_component_request(bom_component: BomComponent) -> Component | None:


def _find_vilocify_component(bom_component: BomComponent) -> Component | None:
vilocify_name, vilocify_version = _vilocify_matcher_for_bom_component(bom_component)
vilocify_name, vilocify_version = match_bom_component(bom_component)
if vilocify_name is not None and vilocify_version is not None:
component = (
Component.where("name", "eq", vilocify_name)
Expand Down
156 changes: 156 additions & 0 deletions vilocify/match.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
"""Matches PURLs to vilocify component names and versions.

Vilocify currently does not support identifying components by PURLs directly. However, component naming in Vilocify
follows certain (undocumented) naming conventions. This module provides functions to map PURL information to names and
versions that can be used to identify components in Vilocify's component database.
"""

# SPDX-FileCopyrightText: 2025 Siemens AG
# SPDX-License-Identifier: MIT

from cyclonedx.model.bom import Component as BomComponent
from packageurl import PackageURL

Matcher = tuple[str | None, str | None]

PURL_TYPES = {
"cargo": "Rust Crate",
"composer": "PHP Package",
"cpan": "Perl Module",
"gem": "RubyGem",
"golang": "Go Package",
"hackage": "Haskell Package",
"npm": "Node.js Package",
"nuget": "NuGet Package",
"pub": "Dart Package",
"pypi": "Python Package",
"swift": "Swift Package",
}

PURL_DISTROS: dict[str, dict[str, dict[str | None, str]]] = {
"alpm": {"arch": {None: "Arch Linux Package"}},
"apk": {
"alpine": {
None: "Alpine Package",
"alpine-3.18": "Alpine 3.18 Package",
"alpine-3.19": "Alpine 3.19 Package",
"alpine-3.20": "Alpine 3.20 Package",
"alpine-3.21": "Alpine 3.21 Package",
},
"openwrt": {
None: "OpenWrt Package",
},
},
"deb": {
"debian": {
None: "Debian Package",
"debian-11": "Debian 11 Package",
"bullseye": "Debian 11 Package",
"debian-12": "Debian 12 Package",
"bookworm": "Debian 12 Package",
},
"ubuntu": {
None: "Ubuntu Package",
"ubuntu-20.04": "Ubuntu 20.04 Package",
"ubuntu-22.04": "Ubuntu 22.04 Package",
"ubuntu-24.04": "Ubuntu 24.04 Package",
},
},
"rpm": {
"almalinux": {
None: "AlmaLinux Package",
"almalinux-8": "AlmaLinux 8 Package",
"almalinux-9": "AlmaLinux 9 Package",
"almalinux-10": "AlmaLinux 10 Package",
},
"amzn": {
None: "Amazon Linux Package",
"amzn-2018": "Amazon Linux Package",
"amzn-2023": "Amazon Linux 2023 Package",
"amzn-2": "Amazon Linux 2 Package",
},
"fedora": {
None: "Fedora Package",
"fedora-40": "Fedora 40 Package",
"fedora-41": "Fedora 41 Package",
"fedora-42": "Fedora 42 Package",
},
"opensuse": {
None: "openSUSE Package",
},
"ol": {
None: "Oracle Linux OS Package",
"ol-7": "Oracle Linux OS 7 Package",
"ol-8": "Oracle Linux OS 8 Package",
"ol-9": "Oracle Linux OS 9 Package",
},
"redhat": {
None: "RHEL Package",
"rhel-7": "RHEL 7 Package",
"rhel-8": "RHEL 8 Package",
"rhel-9": "RHEL 9 Package",
},
"rocky": {
None: "Rocky Linux Package",
"rocky-8": "Rocky Linux 8 Package",
"rocky-9": "Rocky Linux 9 Package",
},
"sles": {
None: "SUSE Linux Enterprise Server Package",
"sles-15.5": "SUSE Linux Enterprise Server 15 SP5 Package",
"sles-15.6": "SUSE Linux Enterprise Server 15 SP6 Package",
"sles-15.7": "SUSE Linux Enterprise Server 15 SP7 Package",
},
},
}


class MissingPurlError(Exception):
"""Raised when importing an SBOM component that has no PURL"""


def _match_purl_distro(purl: PackageURL) -> Matcher:
distro_type = PURL_DISTROS.get(purl.type.lower())
if distro_type is None:
return None, None

if purl.namespace is None:
return None, None

distro_namespace = distro_type.get(purl.namespace.lower())
if distro_namespace is None:
return None, None

qualifier = None
if isinstance(purl.qualifiers, dict):
qualifier = purl.qualifiers.get("distro")
if isinstance(qualifier, str):
qualifier = qualifier.lower()

for matcher, component_prefix in distro_namespace.items():
if matcher is not None and qualifier is not None and qualifier.startswith(matcher):
return f"{component_prefix}: {purl.name}", "All Versions"

return f"{distro_namespace[None]}: {purl.name}", "All Versions"


def _match_purl_type(purl: PackageURL) -> Matcher:
version = purl.version
if version is not None:
version = version.lstrip("v")
return f"{PURL_TYPES[purl.type]}: {purl.namespace + '/' if purl.namespace else ''}{purl.name}", version


def match_purl(purl: PackageURL) -> Matcher:
if purl.type in PURL_TYPES:
return _match_purl_type(purl)

return _match_purl_distro(purl)


def match_bom_component(bom_component: BomComponent) -> Matcher:
purl = bom_component.purl
if purl is None:
raise MissingPurlError(f"purl is missing for BOM component {bom_component}")

return match_purl(purl)