Skip to content

Commit 256fd0c

Browse files
authored
chore: store provenance asset info (#975)
Signed-off-by: Ben Selwyn-Smith <[email protected]>
1 parent 44f7e96 commit 256fd0c

File tree

10 files changed

+1011
-776
lines changed

10 files changed

+1011
-776
lines changed

docs/source/assets/er-diagram.svg

Lines changed: 876 additions & 716 deletions
Loading

src/macaron/database/table_definitions.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -499,9 +499,6 @@ class Provenance(ORMBase):
499499
#: The release tag commit sha.
500500
release_commit_sha: Mapped[str] = mapped_column(String, nullable=True)
501501

502-
#: The release tag.
503-
release_tag: Mapped[str] = mapped_column(String, nullable=True)
504-
505502
#: The repository URL from the provenance.
506503
repository_url: Mapped[str] = mapped_column(String, nullable=True)
507504

@@ -511,6 +508,12 @@ class Provenance(ORMBase):
511508
#: The provenance payload.
512509
provenance_payload: Mapped[InTotoPayload] = mapped_column(ProvenancePayload, nullable=False)
513510

511+
#: The name of the provenance asset.
512+
provenance_asset_name: Mapped[str] = mapped_column(String, nullable=True)
513+
514+
#: The URL of the provenance asset.
515+
provenance_asset_url: Mapped[str] = mapped_column(String, nullable=True)
516+
514517
#: The verified status of the provenance.
515518
verified: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
516519

src/macaron/provenance/provenance_finder.py

Lines changed: 38 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import logging
77
import os
88
import tempfile
9+
from dataclasses import dataclass
910
from functools import partial
1011

1112
from packageurl import PackageURL
@@ -30,6 +31,15 @@
3031
logger: logging.Logger = logging.getLogger(__name__)
3132

3233

34+
@dataclass(frozen=True)
35+
class ProvenanceAsset:
36+
"""This class exists to hold a provenance payload with the original asset's name and URL."""
37+
38+
payload: InTotoPayload
39+
name: str
40+
url: str
41+
42+
3343
class ProvenanceFinder:
3444
"""This class is used to find and retrieve provenance files from supported registries."""
3545

@@ -44,7 +54,7 @@ def __init__(self) -> None:
4454
elif isinstance(registry, JFrogMavenRegistry):
4555
self.jfrog_registry = registry
4656

47-
def find_provenance(self, purl: PackageURL) -> list[InTotoPayload]:
57+
def find_provenance(self, purl: PackageURL) -> list[ProvenanceAsset]:
4858
"""Find the provenance file(s) of the passed PURL.
4959
5060
Parameters
@@ -54,8 +64,8 @@ def find_provenance(self, purl: PackageURL) -> list[InTotoPayload]:
5464
5565
Returns
5666
-------
57-
list[InTotoPayload]
58-
The provenance payload, or an empty list if not found.
67+
list[ProvenanceAsset]
68+
The provenance asset, or an empty list if not found.
5969
"""
6070
logger.debug("Seeking provenance of: %s", purl)
6171

@@ -88,7 +98,7 @@ def find_provenance(self, purl: PackageURL) -> list[InTotoPayload]:
8898
logger.debug("Provenance finding not supported for PURL type: %s", purl.type)
8999
return []
90100

91-
def _find_provenance(self, discovery_functions: list[partial[list[InTotoPayload]]]) -> list[InTotoPayload]:
101+
def _find_provenance(self, discovery_functions: list[partial[list[ProvenanceAsset]]]) -> list[ProvenanceAsset]:
92102
"""Find the provenance file(s) using the passed discovery functions.
93103
94104
Parameters
@@ -99,7 +109,7 @@ def _find_provenance(self, discovery_functions: list[partial[list[InTotoPayload]
99109
Returns
100110
-------
101111
list[InTotoPayload]
102-
The provenance payload(s) from the first successful function, or an empty list if none were.
112+
The provenance asset(s) from the first successful function, or an empty list if none were.
103113
"""
104114
if not discovery_functions:
105115
return []
@@ -114,7 +124,7 @@ def _find_provenance(self, discovery_functions: list[partial[list[InTotoPayload]
114124
return []
115125

116126

117-
def find_npm_provenance(purl: PackageURL, registry: NPMRegistry) -> list[InTotoPayload]:
127+
def find_npm_provenance(purl: PackageURL, registry: NPMRegistry) -> list[ProvenanceAsset]:
118128
"""Find and download the NPM based provenance for the passed PURL.
119129
120130
Two kinds of attestation can be retrieved from npm: "Provenance" and "Publish". The "Provenance" attestation
@@ -131,8 +141,8 @@ def find_npm_provenance(purl: PackageURL, registry: NPMRegistry) -> list[InTotoP
131141
132142
Returns
133143
-------
134-
list[InTotoPayload]
135-
The provenance payload(s), or an empty list if not found.
144+
list[ProvenanceAsset]
145+
The provenance asset(s), or an empty list if not found.
136146
"""
137147
if not registry.enabled:
138148
logger.debug("The npm registry is not enabled.")
@@ -178,16 +188,19 @@ def find_npm_provenance(purl: PackageURL, registry: NPMRegistry) -> list[InTotoP
178188
publish_payload = load_provenance_payload(signed_download_path)
179189
except LoadIntotoAttestationError as error:
180190
logger.error("Error while loading publish attestation: %s", error)
181-
return [provenance_payload]
191+
return [ProvenanceAsset(provenance_payload, npm_provenance_asset.name, npm_provenance_asset.url)]
182192

183-
return [provenance_payload, publish_payload]
193+
return [
194+
ProvenanceAsset(provenance_payload, npm_provenance_asset.name, npm_provenance_asset.url),
195+
ProvenanceAsset(publish_payload, npm_provenance_asset.name, npm_provenance_asset.url),
196+
]
184197

185198
except OSError as error:
186199
logger.error("Error while storing provenance in the temporary directory: %s", error)
187200
return []
188201

189202

190-
def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[InTotoPayload]:
203+
def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[ProvenanceAsset]:
191204
"""Find and download the GAV based provenance for the passed PURL.
192205
193206
Parameters
@@ -199,8 +212,8 @@ def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[
199212
200213
Returns
201214
-------
202-
list[InTotoPayload] | None
203-
The provenance payload if found, or an empty list otherwise.
215+
list[ProvenanceAsset] | None
216+
The provenance asset if found, or an empty list otherwise.
204217
205218
Raises
206219
------
@@ -269,7 +282,7 @@ def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[
269282
if not is_witness_provenance_payload(provenance_payload, witness_verifier_config.predicate_types):
270283
continue
271284

272-
provenances.append(provenance_payload)
285+
provenances.append(ProvenanceAsset(provenance_payload, provenance_asset.name, provenance_asset.url))
273286
except OSError as error:
274287
logger.error("Error while storing provenance in the temporary directory: %s", error)
275288

@@ -281,7 +294,7 @@ def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[
281294
return provenances[:1]
282295

283296

284-
def find_pypi_provenance(purl: PackageURL) -> list[InTotoPayload]:
297+
def find_pypi_provenance(purl: PackageURL) -> list[ProvenanceAsset]:
285298
"""Find and download the PyPI based provenance for the passed PURL.
286299
287300
Parameters
@@ -291,11 +304,11 @@ def find_pypi_provenance(purl: PackageURL) -> list[InTotoPayload]:
291304
292305
Returns
293306
-------
294-
list[InTotoPayload] | None
295-
The provenance payload if found, or an empty list otherwise.
307+
list[ProvenanceAsset]
308+
The provenance assets found, or an empty list otherwise.
296309
"""
297-
attestation, verified = DepsDevRepoFinder.get_attestation(purl)
298-
if not attestation:
310+
attestation, url, verified = DepsDevRepoFinder.get_attestation(purl)
311+
if not (attestation and url):
299312
return []
300313

301314
with tempfile.TemporaryDirectory() as temp_dir:
@@ -306,15 +319,15 @@ def find_pypi_provenance(purl: PackageURL) -> list[InTotoPayload]:
306319
try:
307320
payload = load_provenance_payload(file_name)
308321
payload.verified = verified
309-
return [payload]
322+
return [ProvenanceAsset(payload, purl.name, url)]
310323
except LoadIntotoAttestationError as load_error:
311324
logger.error("Error while loading provenance: %s", load_error)
312325
return []
313326

314327

315328
def find_provenance_from_ci(
316329
analyze_ctx: AnalyzeContext, git_obj: Git | None, download_path: str
317-
) -> InTotoPayload | None:
330+
) -> ProvenanceAsset | None:
318331
"""Try to find provenance from CI services of the repository.
319332
320333
Note that we stop going through the CI services once we encounter a CI service
@@ -409,7 +422,10 @@ def find_provenance_from_ci(
409422
download_provenances_from_ci_service(ci_info, download_path)
410423

411424
# TODO consider how to handle multiple payloads here.
412-
return ci_info["provenances"][0].payload if ci_info["provenances"] else None
425+
if ci_info["provenances"]:
426+
provenance = ci_info["provenances"][0]
427+
return ProvenanceAsset(provenance.payload, provenance.asset.name, provenance.asset.url)
428+
return None
413429

414430
else:
415431
logger.debug("CI service not supported for provenance finding: %s", ci_service.name)

src/macaron/provenance/provenance_verifier.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from macaron.config.defaults import defaults
1818
from macaron.config.global_config import global_config
1919
from macaron.provenance.provenance_extractor import ProvenancePredicate, SLSAGithubGenericBuildDefinitionV01
20+
from macaron.provenance.provenance_finder import ProvenanceAsset
2021
from macaron.repo_finder.commit_finder import AbstractPurlType, determine_abstract_purl_type
2122
from macaron.slsa_analyzer.analyze_context import AnalyzeContext
2223
from macaron.slsa_analyzer.asset import AssetLocator
@@ -28,15 +29,15 @@
2829
logger: logging.Logger = logging.getLogger(__name__)
2930

3031

31-
def verify_provenance(purl: PackageURL, provenance: list[InTotoPayload]) -> bool:
32+
def verify_provenance(purl: PackageURL, provenance_assets: list[ProvenanceAsset]) -> bool:
3233
"""Verify the passed provenance.
3334
3435
Parameters
3536
----------
3637
purl: PackageURL
3738
The PURL of the analysis target.
38-
provenance: list[InTotoPayload]
39-
The list of provenance.
39+
provenance_assets: list[ProvenanceAsset]
40+
The list of provenance assets.
4041
4142
Returns
4243
-------
@@ -50,7 +51,7 @@ def verify_provenance(purl: PackageURL, provenance: list[InTotoPayload]) -> bool
5051
verification_function = None
5152

5253
if purl.type == "npm":
53-
verification_function = partial(verify_npm_provenance, purl, provenance)
54+
verification_function = partial(verify_npm_provenance, purl, provenance_assets)
5455

5556
# TODO other verification functions go here.
5657

@@ -61,31 +62,34 @@ def verify_provenance(purl: PackageURL, provenance: list[InTotoPayload]) -> bool
6162
return False
6263

6364

64-
def verify_npm_provenance(purl: PackageURL, provenance: list[InTotoPayload]) -> bool:
65+
def verify_npm_provenance(purl: PackageURL, provenance_assets: list[ProvenanceAsset]) -> bool:
6566
"""Compare the unsigned payload subject digest with the signed payload digest, if available.
6667
6768
Parameters
6869
----------
6970
purl: PackageURL
7071
The PURL of the analysis target.
71-
provenance: list[InTotoPayload]
72-
The provenances to verify.
72+
provenance_assets: list[ProvenanceAsset]
73+
The provenance assets to verify.
7374
7475
Returns
7576
-------
7677
bool
7778
True if the provenance was verified, or False otherwise.
7879
"""
79-
if len(provenance) != 2:
80-
logger.debug("Expected unsigned and signed provenance.")
80+
if len(provenance_assets) != 2:
81+
logger.debug("Expected unsigned and signed provenance assets.")
8182
return False
8283

83-
signed_subjects = provenance[1].statement.get("subject")
84+
signed_provenance = provenance_assets[1].payload
85+
unsigned_provenance = provenance_assets[0].payload
86+
87+
signed_subjects = signed_provenance.statement.get("subject")
8488
if not signed_subjects:
8589
logger.debug("Missing signed subjects.")
8690
return False
8791

88-
unsigned_subjects = provenance[0].statement.get("subject")
92+
unsigned_subjects = unsigned_provenance.statement.get("subject")
8993
if not unsigned_subjects:
9094
logger.debug("Missing unsigned subjects.")
9195
return False

src/macaron/repo_finder/repo_finder_deps_dev.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ def get_latest_version(purl: PackageURL) -> tuple[PackageURL | None, RepoFinderI
164164
)
165165

166166
@staticmethod
167-
def get_attestation(purl: PackageURL) -> tuple[dict | None, bool]:
167+
def get_attestation(purl: PackageURL) -> tuple[dict | None, str | None, bool]:
168168
"""Retrieve the attestation associated with the passed PURL.
169169
170170
Parameters
@@ -174,17 +174,18 @@ def get_attestation(purl: PackageURL) -> tuple[dict | None, bool]:
174174
175175
Returns
176176
-------
177-
tuple[dict | None, bool]
178-
The attestation, or None if not found, and a flag for whether it is verified.
177+
tuple[dict | None, str | None, bool]
178+
The attestation, or None if not found, the url of the attestation asset,
179+
and a flag for whether the attestation is verified.
179180
"""
180181
if purl.type != "pypi":
181182
logger.debug("PURL type (%s) attestation not yet supported via deps.dev.")
182-
return None, False
183+
return None, None, False
183184

184185
if not purl.version:
185186
latest_purl, _ = DepsDevRepoFinder.get_latest_version(purl)
186187
if not latest_purl:
187-
return None, False
188+
return None, None, False
188189
purl = latest_purl
189190

190191
# Example of a PURL endpoint for deps.dev with '/' encoded as '%2F':
@@ -194,7 +195,7 @@ def get_attestation(purl: PackageURL) -> tuple[dict | None, bool]:
194195

195196
result = send_get_http(target_url, headers={})
196197
if not result:
197-
return None, False
198+
return None, None, False
198199

199200
attestation_keys = ["attestations"]
200201
if "version" in result:
@@ -203,21 +204,22 @@ def get_attestation(purl: PackageURL) -> tuple[dict | None, bool]:
203204
result_attestations = json_extract(result, attestation_keys, list)
204205
if not result_attestations:
205206
logger.debug("No attestations in result.")
206-
return None, False
207+
return None, None, False
207208
if len(result_attestations) > 1:
208209
logger.debug("More than one attestation in result: %s", len(result_attestations))
209210

210211
attestation_url = json_extract(result_attestations, [0, "url"], str)
211212
if not attestation_url:
212213
logger.debug("No attestation reported for %s", purl)
213-
return None, False
214+
return None, None, False
214215

215216
attestation_data = send_get_http(attestation_url, headers={})
216217
if not attestation_data:
217-
return None, False
218+
return None, None, False
218219

219220
return (
220221
PyPIRegistry().extract_attestation(attestation_data),
222+
attestation_url,
221223
json_extract(result_attestations, [0, "verified"], bool) or False,
222224
)
223225

src/macaron/slsa_analyzer/analyzer.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -357,15 +357,17 @@ def run_single(
357357
package_registries_info = self._populate_package_registry_info()
358358

359359
provenance_is_verified = False
360+
provenance_asset = None
360361
if not provenance_payload and parsed_purl:
361362
# Try to find the provenance file for the parsed PURL.
362363
provenance_finder = ProvenanceFinder()
363364
provenances = provenance_finder.find_provenance(parsed_purl)
364365
if provenances:
365-
provenance_payload = provenances[0]
366+
provenance_asset = provenances[0]
367+
provenance_payload = provenance_asset.payload
366368
if provenance_payload.verified:
367369
provenance_is_verified = True
368-
elif verify_provenance:
370+
if verify_provenance:
369371
provenance_is_verified = provenance_verifier.verify_provenance(parsed_purl, provenances)
370372

371373
# Try to extract the repository URL and commit digest from the Provenance, if it exists.
@@ -490,10 +492,11 @@ def run_single(
490492
if not provenance_payload:
491493
# Look for provenance using the CI.
492494
with tempfile.TemporaryDirectory() as temp_dir:
493-
provenance_payload = find_provenance_from_ci(analyze_ctx, git_obj, temp_dir)
495+
provenance_asset = find_provenance_from_ci(analyze_ctx, git_obj, temp_dir)
494496
# If found, validate analysis target against new provenance.
495-
if provenance_payload:
497+
if provenance_asset:
496498
# If repository URL was not provided as input, check the one found during analysis.
499+
provenance_payload = provenance_asset.payload
497500
if not repo_path_input and component.repository:
498501
repo_path_input = component.repository.remote_path
499502
provenance_repo_url = provenance_commit_digest = None
@@ -538,7 +541,9 @@ def run_single(
538541
provenance_payload=provenance_payload,
539542
slsa_level=slsa_level,
540543
slsa_version=slsa_version,
541-
# TODO Add release tag, release digest.
544+
provenance_asset_name=provenance_asset.name if provenance_asset else None,
545+
provenance_asset_url=provenance_asset.url if provenance_asset else None,
546+
# TODO Add release digest.
542547
)
543548

544549
analyze_ctx.dynamic_data["validate_malware"] = validate_malware

0 commit comments

Comments
 (0)