Skip to content

Commit

Permalink
fix: merge purls from matching records in lock-file (#965)
Browse files Browse the repository at this point in the history
  • Loading branch information
baszalmstra authored Dec 5, 2024
1 parent b90daf5 commit a8082a6
Show file tree
Hide file tree
Showing 8 changed files with 379 additions and 30 deletions.
126 changes: 120 additions & 6 deletions crates/rattler_lock/src/builder.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
//! Builder for the creation of lock files.
use std::{
borrow::Cow,
collections::{BTreeSet, HashMap},
sync::Arc,
};

use fxhash::FxHashMap;
use indexmap::{IndexMap, IndexSet};
use pep508_rs::ExtraName;
use rattler_conda_types::Platform;
use rattler_conda_types::{Platform, Version};

use crate::{
file_format_version::FileFormatVersion, Channel, CondaBinaryData, CondaPackageData,
Expand Down Expand Up @@ -118,11 +119,34 @@ pub struct LockFileBuilder {
environments: IndexMap<String, EnvironmentData>,

/// A list of all package metadata stored in the lock file.
conda_packages: IndexSet<CondaPackageData>,
conda_packages: IndexMap<UniqueCondaIdentifier, CondaPackageData>,
pypi_packages: IndexSet<PypiPackageData>,
pypi_runtime_configurations: IndexSet<HashablePypiPackageEnvironmentData>,
}

/// A unique identifier for a conda package. This is used to deduplicate
/// packages. This only includes the unique identifying aspects of a package.
#[derive(Debug, Hash, Eq, PartialEq)]
struct UniqueCondaIdentifier {
location: UrlOrPath,
normalized_name: String,
version: Version,
build: String,
subdir: String,
}

impl<'a> From<&'a CondaPackageData> for UniqueCondaIdentifier {
fn from(value: &'a CondaPackageData) -> Self {
Self {
location: value.location().clone(),
normalized_name: value.record().name.as_normalized().to_string(),
version: value.record().version.version().clone(),
build: value.record().build.clone(),
subdir: value.record().subdir.clone(),
}
}
}

impl LockFileBuilder {
/// Generate a new lock file using the builder pattern
pub fn new() -> Self {
Expand Down Expand Up @@ -184,15 +208,25 @@ impl LockFileBuilder {
indexes: None,
});

let unique_identifier = UniqueCondaIdentifier::from(&locked_package);

// Add the package to the list of packages.
let package_idx = self.conda_packages.insert_full(locked_package).0;
let entry = self.conda_packages.entry(unique_identifier);
let package_idx = entry.index();
entry
.and_modify(|pkg| {
if let Cow::Owned(merged_package) = pkg.merge(&locked_package) {
*pkg = merged_package;
}
})
.or_insert(locked_package);

// Add the package to the environment that it is intended for.
environment
.packages
.entry(platform)
.or_default()
.push(EnvironmentPackageData::Conda(package_idx));
.insert(EnvironmentPackageData::Conda(package_idx));

self
}
Expand Down Expand Up @@ -231,7 +265,7 @@ impl LockFileBuilder {
.packages
.entry(platform)
.or_default()
.push(EnvironmentPackageData::Pypi(package_idx, runtime_idx));
.insert(EnvironmentPackageData::Pypi(package_idx, runtime_idx));

self
}
Expand Down Expand Up @@ -327,7 +361,7 @@ impl LockFileBuilder {
LockFile {
inner: Arc::new(LockFileInner {
version: FileFormatVersion::LATEST,
conda_packages: self.conda_packages.into_iter().collect(),
conda_packages: self.conda_packages.into_values().collect(),
pypi_packages: self.pypi_packages.into_iter().collect(),
pypi_environment_package_data: self
.pypi_runtime_configurations
Expand Down Expand Up @@ -362,3 +396,83 @@ impl From<PypiPackageEnvironmentData> for HashablePypiPackageEnvironmentData {
}
}
}

#[cfg(test)]
mod test {
use std::str::FromStr;

use rattler_conda_types::{PackageName, PackageRecord, Platform, Version};
use url::Url;

use crate::{CondaBinaryData, LockFile};

#[test]
fn test_merge_records_and_purls() {
let record = PackageRecord {
subdir: "linux-64".into(),
..PackageRecord::new(
PackageName::new_unchecked("foobar"),
Version::from_str("1.0.0").unwrap(),
"build".into(),
)
};

let record_with_purls = PackageRecord {
purls: Some(
["pkg:pypi/[email protected]".parse().unwrap()]
.into_iter()
.collect(),
),
..record.clone()
};

let lock_file = LockFile::builder()
.with_conda_package(
"default",
Platform::Linux64,
CondaBinaryData {
package_record: record.clone(),
location: Url::parse(
"https://prefix.dev/example/linux-64/foobar-1.0.0-build.tar.bz2",
)
.unwrap()
.into(),
file_name: "foobar-1.0.0-build.tar.bz2".to_string(),
channel: None,
}
.into(),
)
.with_conda_package(
"default",
Platform::Linux64,
CondaBinaryData {
package_record: record.clone(),
location: Url::parse(
"https://prefix.dev/example/linux-64/foobar-1.0.0-build.tar.bz2",
)
.unwrap()
.into(),
file_name: "foobar-1.0.0-build.tar.bz2".to_string(),
channel: None,
}
.into(),
)
.with_conda_package(
"foobar",
Platform::Linux64,
CondaBinaryData {
package_record: record_with_purls,
location: Url::parse(
"https://prefix.dev/example/linux-64/foobar-1.0.0-build.tar.bz2",
)
.unwrap()
.into(),
file_name: "foobar-1.0.0-build.tar.bz2".to_string(),
channel: None,
}
.into(),
)
.finish();
insta::assert_snapshot!(lock_file.render_to_string().unwrap());
}
}
97 changes: 96 additions & 1 deletion crates/rattler_lock/src/conda.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{cmp::Ordering, hash::Hash};
use std::{borrow::Cow, cmp::Ordering, hash::Hash};

use rattler_conda_types::{
ChannelUrl, MatchSpec, Matches, NamelessMatchSpec, PackageRecord, RepoDataRecord,
Expand Down Expand Up @@ -78,6 +78,27 @@ impl CondaPackageData {
Self::Source(data) => Some(data),
}
}

/// Performs the best effort merge of two conda packages.
/// Some fields in the packages are optional, if one of the packages
/// contain an optional field they are merged.
pub(crate) fn merge(&self, other: &Self) -> Cow<'_, Self> {
match (self, other) {
(CondaPackageData::Binary(left), CondaPackageData::Binary(right)) => {
if let Cow::Owned(merged) = left.merge(right) {
return Cow::Owned(merged.into());
}
}
(CondaPackageData::Source(left), CondaPackageData::Source(right)) => {
if let Cow::Owned(merged) = left.merge(right) {
return Cow::Owned(merged.into());
}
}
_ => {}
}

Cow::Borrowed(self)
}
}

/// Information about a binary conda package stored in the lock-file.
Expand All @@ -102,6 +123,23 @@ impl From<CondaBinaryData> for CondaPackageData {
}
}

impl CondaBinaryData {
pub(crate) fn merge(&self, other: &Self) -> Cow<'_, Self> {
if self.location == other.location {
if let Cow::Owned(merged) =
merge_package_record(&self.package_record, &other.package_record)
{
return Cow::Owned(Self {
package_record: merged,
..self.clone()
});
}
}

Cow::Borrowed(self)
}
}

/// Information about a source package stored in the lock-file.
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct CondaSourceData {
Expand All @@ -121,6 +159,23 @@ impl From<CondaSourceData> for CondaPackageData {
}
}

impl CondaSourceData {
pub(crate) fn merge(&self, other: &Self) -> Cow<'_, Self> {
if self.location == other.location {
if let Cow::Owned(merged) =
merge_package_record(&self.package_record, &other.package_record)
{
return Cow::Owned(Self {
package_record: merged,
..self.clone()
});
}
}

Cow::Borrowed(self)
}
}

/// A record of input files that were used to define the metadata of the
/// package.
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
Expand Down Expand Up @@ -270,3 +325,43 @@ impl Matches<NamelessMatchSpec> for CondaPackageData {
spec.matches(self.record())
}
}

fn merge_package_record<'a>(
left: &'a PackageRecord,
right: &PackageRecord,
) -> Cow<'a, PackageRecord> {
let mut result = Cow::Borrowed(left);

// If the left package doesn't contain purls we merge those from the right one.
if left.purls.is_none() && right.purls.is_some() {
result = Cow::Owned(PackageRecord {
purls: right.purls.clone(),
..result.into_owned()
});
}

// If the left package doesn't contain run_exports we merge those from the right
// one.
if left.run_exports.is_none() && right.run_exports.is_some() {
result = Cow::Owned(PackageRecord {
run_exports: right.run_exports.clone(),
..result.into_owned()
});
}

// Merge hashes if the left package doesn't contain them.
if left.md5.is_none() && right.md5.is_some() {
result = Cow::Owned(PackageRecord {
md5: right.md5,
..result.into_owned()
});
}
if left.sha256.is_none() && right.sha256.is_some() {
result = Cow::Owned(PackageRecord {
sha256: right.sha256,
..result.into_owned()
});
}

result
}
6 changes: 4 additions & 2 deletions crates/rattler_lock/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
use std::{collections::HashMap, io::Read, path::Path, str::FromStr, sync::Arc};

use fxhash::FxHashMap;
use indexmap::IndexSet;
use rattler_conda_types::{Platform, RepoDataRecord};

mod builder;
Expand Down Expand Up @@ -136,7 +137,7 @@ struct LockFileInner {
/// enum and might contain additional data that is specific to the environment.
/// For instance different environments might select the same Pypi package but
/// with different extras.
#[derive(Clone, Copy, Debug)]
#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
enum EnvironmentPackageData {
Conda(usize),
Pypi(usize, usize),
Expand All @@ -158,7 +159,7 @@ struct EnvironmentData {

/// For each individual platform this environment supports we store the
/// package identifiers associated with the environment.
packages: FxHashMap<Platform, Vec<EnvironmentPackageData>>,
packages: FxHashMap<Platform, IndexSet<EnvironmentPackageData>>,
}

impl LockFile {
Expand Down Expand Up @@ -532,6 +533,7 @@ mod test {
#[case::v4_pypi_path("v4/path-based-lock.yml")]
#[case::v4_pypi_absolute_path("v4/absolute-path-lock.yml")]
#[case::v5_pypi_flat_index("v5/flat-index-lock.yml")]
#[case::v5_with_and_without_purl("v5/similar-with-and-without-purl.yml")]
#[case::v6_conda_source_path("v6/conda-path-lock.yml")]
#[case::v6_derived_channel("v6/derived-channel-lock.yml")]
fn test_parse(#[case] file_name: &str) {
Expand Down
Loading

0 comments on commit a8082a6

Please sign in to comment.