Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add compression_type option for publish #3367

Merged
merged 2 commits into from
Dec 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/3316.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added a ``compression_type`` option to allow publishing metadata files with zstd compression (in addition to the default gzip).
11 changes: 11 additions & 0 deletions pulp_rpm/app/constants.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
from types import SimpleNamespace

# metadata compression types supported
COMPRESSION_TYPES = SimpleNamespace(
ZSTD="zstd",
GZ="gz",
)

COMPRESSION_CHOICES = (
(COMPRESSION_TYPES.ZSTD, COMPRESSION_TYPES.ZSTD),
(COMPRESSION_TYPES.GZ, COMPRESSION_TYPES.GZ),
)

CHECKSUM_TYPES = SimpleNamespace(
UNKNOWN="unknown",
MD5="md5",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 4.2.7 on 2023-12-12 18:37

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('rpm', '0058_alter_addon_repository_alter_variant_repository'),
]

operations = [
migrations.AddField(
model_name='rpmpublication',
name='compression_type',
field=models.TextField(choices=[('zstd', 'zstd'), ('gz', 'gz')], null=True),
),
migrations.AddField(
model_name='rpmrepository',
name='compression_type',
field=models.TextField(choices=[('zstd', 'zstd'), ('gz', 'gz')], null=True),
),
]
7 changes: 6 additions & 1 deletion pulp_rpm/app/models/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
validate_version_paths,
)

from pulp_rpm.app.constants import CHECKSUM_CHOICES
from pulp_rpm.app.constants import CHECKSUM_CHOICES, COMPRESSION_CHOICES
from pulp_rpm.app.models import (
DistributionTree,
Package,
Expand Down Expand Up @@ -199,6 +199,8 @@ class RpmRepository(Repository, AutoAddObjPermsMixin):
package_checksum_type (String):
The name of a default checksum type to use for packages when generating metadata.
repo_config (JSON): repo configuration that will be served by distribution
compression_type(pulp_rpm.app.constants.COMPRESSION_TYPES):
Compression type to use for metadata files.
"""

TYPE = "rpm"
Expand Down Expand Up @@ -226,6 +228,7 @@ class RpmRepository(Repository, AutoAddObjPermsMixin):

autopublish = models.BooleanField(default=False)
checksum_type = models.TextField(null=True, choices=CHECKSUM_CHOICES)
compression_type = models.TextField(null=True, choices=COMPRESSION_CHOICES)
metadata_checksum_type = models.TextField(null=True, choices=CHECKSUM_CHOICES)
package_checksum_type = models.TextField(null=True, choices=CHECKSUM_CHOICES)
repo_config = models.JSONField(default=dict)
Expand All @@ -252,6 +255,7 @@ def on_new_version(self, version):
"package": self.package_checksum_type,
},
repo_config=self.repo_config,
compression_type=self.compression_type,
)

@staticmethod
Expand Down Expand Up @@ -416,6 +420,7 @@ class RpmPublication(Publication, AutoAddObjPermsMixin):

TYPE = "rpm"
checksum_type = models.TextField(choices=CHECKSUM_CHOICES)
compression_type = models.TextField(null=True, choices=COMPRESSION_CHOICES)
metadata_checksum_type = models.TextField(choices=CHECKSUM_CHOICES)
package_checksum_type = models.TextField(choices=CHECKSUM_CHOICES)
repo_config = models.JSONField(default=dict)
Expand Down
14 changes: 14 additions & 0 deletions pulp_rpm/app/serializers/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
CHECKSUM_CHOICES,
SKIP_TYPES,
SYNC_POLICY_CHOICES,
COMPRESSION_CHOICES,
)
from pulp_rpm.app.models import (
RpmDistribution,
Expand Down Expand Up @@ -89,6 +90,12 @@ class RpmRepositorySerializer(RepositorySerializer):
required=False,
allow_null=True,
)
compression_type = serializers.ChoiceField(
help_text=_("The compression type to use for metadata files."),
choices=COMPRESSION_CHOICES,
required=False,
allow_null=True,
)
gpgcheck = serializers.IntegerField(
max_value=1,
min_value=0,
Expand Down Expand Up @@ -203,6 +210,7 @@ class Meta:
"repo_gpgcheck",
"sqlite_metadata",
"repo_config",
"compression_type",
)
model = RpmRepository

Expand Down Expand Up @@ -297,6 +305,11 @@ class RpmPublicationSerializer(PublicationSerializer):
choices=CHECKSUM_CHOICES,
required=False,
)
compression_type = serializers.ChoiceField(
help_text=_("The compression type to use for metadata files."),
choices=COMPRESSION_CHOICES,
required=False,
)
gpgcheck = serializers.IntegerField(
max_value=1,
min_value=0,
Expand Down Expand Up @@ -377,6 +390,7 @@ class Meta:
"repo_gpgcheck",
"sqlite_metadata",
"repo_config",
"compression_type",
)
model = RpmPublication

Expand Down
46 changes: 30 additions & 16 deletions pulp_rpm/app/tasks/publishing.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,32 @@
from collections import defaultdict
from gettext import gettext as _
import os
import logging
import os
import shutil
import tempfile
from collections import defaultdict
from gettext import gettext as _

import createrepo_c as cr
import libcomps

from django.conf import settings
from django.core.files import File
from django.db.models import Q

from pulpcore.plugin.models import (
AsciiArmoredDetachedSigningService,
ContentArtifact,
RepositoryVersion,
ProgressReport,
PublishedArtifact,
PublishedMetadata,
RepositoryContent,
RepositoryVersion,
)

from pulp_rpm.app.comps import dict_to_strdict
from pulp_rpm.app.constants import ALLOWED_CHECKSUM_ERROR_MSG, CHECKSUM_TYPES, PACKAGES_DIRECTORY
from pulp_rpm.app.constants import (
ALLOWED_CHECKSUM_ERROR_MSG,
CHECKSUM_TYPES,
COMPRESSION_TYPES,
PACKAGES_DIRECTORY,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you mind alphabetizing the imports?

)
from pulp_rpm.app.kickstart.treeinfo import PulpTreeInfo, TreeinfoData
from pulp_rpm.app.models import (
DistributionTree,
Expand Down Expand Up @@ -325,6 +328,7 @@ def publish(
metadata_signing_service=None,
checksum_types=None,
repo_config=None,
compression_type=COMPRESSION_TYPES.GZ,
):
"""
Create a Publication based on a RepositoryVersion.
Expand All @@ -335,6 +339,8 @@ def publish(
A reference to an associated signing service.
checksum_types (dict): Checksum types for metadata and packages.
repo_config (JSON): repo config that will be served by distribution
compression_type(pulp_rpm.app.constants.COMPRESSION_TYPES):
Compression type to use for metadata files.

"""
repository_version = RepositoryVersion.objects.get(pk=repository_version_pk)
Expand All @@ -360,7 +366,7 @@ def publish(
publication.checksum_type = checksum_type
publication.metadata_checksum_type = checksum_type
publication.package_checksum_type = checksum_types.get("package") or checksum_type

publication.compression_type = compression_type
publication.repo_config = repo_config

publication_data = PublicationData(publication)
Expand All @@ -382,6 +388,7 @@ def publish(
checksum_types,
publication_data.repomdrecords,
metadata_signing_service=metadata_signing_service,
compression_type=compression_type,
)
publish_pb.increment()

Expand All @@ -397,6 +404,7 @@ def publish(
extra_repomdrecords,
name,
metadata_signing_service=metadata_signing_service,
compression_type=compression_type,
)
publish_pb.increment()

Expand All @@ -412,6 +420,7 @@ def generate_repo_metadata(
extra_repomdrecords,
sub_folder=None,
metadata_signing_service=None,
compression_type=COMPRESSION_TYPES.GZ,
):
"""
Creates a repomd.xml file.
Expand All @@ -423,6 +432,8 @@ def generate_repo_metadata(
sub_folder(str): name of the folder for sub repos
metadata_signing_service (pulpcore.app.models.AsciiArmoredDetachedSigningService):
A reference to an associated signing service.
compression_type(pulp_rpm.app.constants.COMPRESSION_TYPES):
Compression type to use for metadata files.

"""
cwd = os.getcwd()
Expand All @@ -442,17 +453,20 @@ def generate_repo_metadata(
)

# Prepare metadata files
compression_extension = ".zst" if compression_type == COMPRESSION_TYPES.ZSTD else ".gz"
cr_compression_type = cr.ZSTD if compression_type == COMPRESSION_TYPES.ZSTD else cr.GZ

repomd_path = os.path.join(cwd, "repomd.xml")
pri_xml_path = os.path.join(cwd, "primary.xml.gz")
fil_xml_path = os.path.join(cwd, "filelists.xml.gz")
oth_xml_path = os.path.join(cwd, "other.xml.gz")
upd_xml_path = os.path.join(cwd, "updateinfo.xml.gz")
pri_xml_path = os.path.join(cwd, "primary.xml") + compression_extension
fil_xml_path = os.path.join(cwd, "filelists.xml") + compression_extension
oth_xml_path = os.path.join(cwd, "other.xml") + compression_extension
upd_xml_path = os.path.join(cwd, "updateinfo.xml") + compression_extension
mod_yml_path = os.path.join(cwd, "modules.yaml")
comps_xml_path = os.path.join(cwd, "comps.xml")

pri_xml = cr.PrimaryXmlFile(pri_xml_path, compressiontype=cr.GZ)
fil_xml = cr.FilelistsXmlFile(fil_xml_path, compressiontype=cr.GZ)
oth_xml = cr.OtherXmlFile(oth_xml_path, compressiontype=cr.GZ)
pri_xml = cr.PrimaryXmlFile(pri_xml_path, compressiontype=cr_compression_type)
fil_xml = cr.FilelistsXmlFile(fil_xml_path, compressiontype=cr_compression_type)
oth_xml = cr.OtherXmlFile(oth_xml_path, compressiontype=cr_compression_type)
upd_xml = None

# We want to support publishing with a different checksum type than the one built-in to the
Expand Down Expand Up @@ -567,7 +581,7 @@ def generate_repo_metadata(
update_records = UpdateRecord.objects.filter(pk__in=content).order_by("id", "digest")
for update_record in update_records.iterator():
if not upd_xml:
upd_xml = cr.UpdateInfoXmlFile(upd_xml_path, compressiontype=cr.GZ)
upd_xml = cr.UpdateInfoXmlFile(upd_xml_path, compressiontype=cr_compression_type)
upd_xml.add_chunk(cr.xml_dump_updaterecord(update_record.to_createrepo_c()))

# Process modulemd, modulemd_defaults and obsoletes
Expand Down
2 changes: 2 additions & 0 deletions pulp_rpm/app/viewsets/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,7 @@ def create(self, request):
)
repo_config = serializer.validated_data.get("repo_config", repository.repo_config)
repo_config = gpgcheck_options if gpgcheck_options else repo_config
compression_type = serializer.validated_data.get("compression_type")

if repository.metadata_signing_service:
signing_service_pk = repository.metadata_signing_service.pk
Expand All @@ -578,6 +579,7 @@ def create(self, request):
"metadata_signing_service": signing_service_pk,
"checksum_types": checksum_types,
"repo_config": repo_config,
"compression_type": compression_type,
},
)
return OperationPostponedResponse(result, request)
Expand Down
67 changes: 53 additions & 14 deletions pulp_rpm/tests/functional/api/test_publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,35 @@ def test_publish_any_repo_version(
}
rpm_publication_api.create(body)

@pytest.mark.parametrize("compression_type,compression_ext", (("gz", ".gz"), ("zstd", ".zst")))
@pytest.mark.parallel
def test_publish_with_compression_types(
self,
compression_type,
compression_ext,
rpm_unsigned_repo_immediate,
rpm_publication_api,
gen_object_with_cleanup,
rpm_distribution_api,
monitor_task,
):
"""Sync and publish an RPM repository w/ zstd compression and verify it exists."""
# 1. Publish and distribute
publish_data = RpmRpmPublication(
repository=rpm_unsigned_repo_immediate.pulp_href, compression_type=compression_type
)
publish_response = rpm_publication_api.create(publish_data)
created_resources = monitor_task(publish_response.task).created_resources
publication_href = created_resources[0]

body = gen_distribution(publication=publication_href)
distribution = gen_object_with_cleanup(rpm_distribution_api, body)

# 2. Check "primary", "filelists", "other", "updateinfo" have correct compression ext
for md_type, md_href in self.get_repomd_metadata_urls(distribution.base_url).items():
if md_type in ("primary", "filelists", "other", "updateinfo"):
assert md_href.endswith(compression_ext)

@pytest.mark.parallel
def test_validate_no_checksum_tag(
self,
Expand All @@ -95,11 +124,7 @@ def test_validate_no_checksum_tag(
distribution = gen_object_with_cleanup(rpm_distribution_api, body)

# 2. check the tag 'sum' is not present in updateinfo.xml
repomd = ElementTree.fromstring(
requests.get(os.path.join(distribution.base_url, "repodata/repomd.xml")).text
)

update_xml_url = self._get_updateinfo_xml_path(repomd)
update_xml_url = self.get_repomd_metadata_urls(distribution.base_url)["updateinfo"]
update_xml = download_and_decompress_file(
os.path.join(distribution.base_url, update_xml_url)
)
Expand All @@ -109,23 +134,37 @@ def test_validate_no_checksum_tag(
assert "sum" not in tags, update_info_content

@staticmethod
def _get_updateinfo_xml_path(root_elem):
"""Return the path to ``updateinfo.xml.gz``, relative to repository root.

Given a repomd.xml, this method parses the xml and returns the
location of updateinfo.xml.gz.
def get_repomd_metadata_urls(repomd_url: str):
"""
Helper function to get hrefs of repomd types.

Example:
```
>>> get_repomd_metadata_urls(distribution.base_url)
{
"primary": "repodata/.../primary.xml.gz",
"filelists": "repodata/.../filelists.xml.gz",
...
}
```
"""
# XML Reference:
# <ns0:repomd xmlns:ns0="http://linux.duke.edu/metadata/repo">
# <ns0:data type="primary">
# <ns0:checksum type="sha256">[…]</ns0:checksum>
# <ns0:location href="repodata/[…]-primary.xml.gz" />
# …
# </ns0:data>
# …
xpath = "{{{}}}data".format(RPM_NAMESPACES["metadata/repo"])
data_elems = [elem for elem in root_elem.findall(xpath) if elem.get("type") == "updateinfo"]
xpath = "{{{}}}location".format(RPM_NAMESPACES["metadata/repo"])
return data_elems[0].find(xpath).get("href")
repomd_xml = requests.get(os.path.join(repomd_url, "repodata/repomd.xml")).text
repomd = ElementTree.fromstring(repomd_xml)
xpath_data = "{{{}}}data".format(RPM_NAMESPACES["metadata/repo"])
xpath_location = "{{{}}}location".format(RPM_NAMESPACES["metadata/repo"])
hrefs = {}
for elem in repomd.findall(xpath_data):
md_type = elem.get("type")
hrefs[md_type] = elem.find(xpath_location).get("href")
return hrefs


@pytest.fixture(scope="class")
Expand Down
Loading