Skip to content

Commit 44f7e96

Browse files
authored
chore: improve testing of suffix indexing (#1080)
Signed-off-by: Ben Selwyn-Smith <[email protected]>
1 parent dcc4af3 commit 44f7e96

File tree

2 files changed

+72
-35
lines changed

2 files changed

+72
-35
lines changed

src/macaron/repo_finder/commit_finder.py

Lines changed: 57 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ def _build_version_pattern(name: str, version: str) -> tuple[Pattern | None, lis
367367
368368
Returns
369369
-------
370-
tuple[Pattern | None, list[str]]
370+
tuple[Pattern | None, list[str], CommitFinderInfo]
371371
The tuple of the regex pattern that will match the version, the list of version parts that were extracted, and
372372
the outcome to report. If an exception occurs from any regex operation, the pattern will be returned as None.
373373
@@ -384,43 +384,19 @@ def _build_version_pattern(name: str, version: str) -> tuple[Pattern | None, lis
384384
logger.debug("Version contained no valid parts: %s", version)
385385
return None, [], CommitFinderInfo.INVALID_VERSION
386386

387-
logger.debug("Final version parts: %s", parts)
387+
logger.debug("Version parts: %s", parts)
388+
389+
# Determine optional suffixes.
390+
optional_start_index = determine_optional_suffix_index(version, parts)
388391

389-
this_version_pattern = ""
390392
# Detect versions that end with a zero number (0, 00, 000, etc.), so that part can be made optional.
391393
has_trailing_zero = len(parts) > 2 and multiple_zero_pattern.match(parts[-1])
392394

393-
# Version parts that are alphanumeric, and do not come before parts that are purely numeric, can be treated
394-
# as optional suffixes.
395-
# E.g.
396-
# - 1.2.RELEASE -> 'RELEASE' becomes optional.
397-
# - 3.1.test.2.M5 -> 'M5' becomes optional.
398-
# Parts that come after a change in seperator are also flagged as optional.
399-
# - 2.2-3 -> '3' becomes optional.
400-
optional_start_index = None
401-
separators = _split_separators(version)
402-
last_separator = separators[0] if separators else None
403-
for index in range(1, len(parts)):
404-
# Check if current part should be optional, or reset the index if not.
405-
optional_start_index = None if numeric_only_pattern.match(parts[index]) else index
406-
407-
if not last_separator:
408-
continue
409-
410-
if index >= len(separators):
411-
continue
412-
413-
# Check if parts should be made optional based on a difference in separators.
414-
new_separator = separators[index]
415-
if new_separator != last_separator:
416-
optional_start_index = index + 1
417-
break
418-
last_separator = new_separator
419-
420395
# Create the pattern.
396+
this_version_pattern = ""
421397
for count, part in enumerate(parts):
422-
# This part will be made optional in the regex if within the optional suffix range, or the final part and it
423-
# is a trailing zero.
398+
# This part will be made optional in the regex if it is within the optional suffix range, or is the final part
399+
# and is a trailing zero.
424400
optional = (optional_start_index and count >= optional_start_index) or (
425401
count == len(parts) - 1 and has_trailing_zero
426402
)
@@ -483,6 +459,54 @@ def _build_version_pattern(name: str, version: str) -> tuple[Pattern | None, lis
483459
return None, [], CommitFinderInfo.REGEX_COMPILE_FAILURE
484460

485461

462+
def determine_optional_suffix_index(version: str, parts: list[str]) -> int | None:
463+
"""Determine optional suffix index of a given version string.
464+
465+
Version parts that are alphanumeric, and do not come before parts that are purely numeric, can be treated
466+
as optional suffixes.
467+
E.g.
468+
- 1.2.RELEASE -> 'RELEASE' becomes optional.
469+
- 3.1.test.2.M5 -> 'M5' becomes optional.
470+
Parts that come after a change in seperator are also flagged as optional.
471+
- 2.2-3 -> '3' becomes optional.
472+
473+
Parameters
474+
----------
475+
version: str
476+
The version string of the software component.
477+
parts: list[str]
478+
The non-separator parts of the version produced by a prior split operation.
479+
480+
Returns
481+
-------
482+
int | None
483+
The index of the first optional part, or None if not found. This is a zero-based index to match the parts
484+
parameter, with the caveat that a value of zero cannot be returned due to the behaviour of the algorithm.
485+
In other words, there must always be at least one non-optional part.
486+
"""
487+
optional_start_index = None
488+
separators = _split_separators(version)
489+
last_separator = separators[0] if separators else None
490+
for index in range(1, len(parts)):
491+
# Check if current part should be optional, or reset the index if not.
492+
optional_start_index = None if numeric_only_pattern.match(parts[index]) else index
493+
494+
if not last_separator:
495+
continue
496+
497+
if index >= len(separators):
498+
continue
499+
500+
# Check if parts should be made optional based on a difference in separators.
501+
new_separator = separators[index]
502+
if new_separator != last_separator:
503+
optional_start_index = index + 1
504+
break
505+
last_separator = new_separator
506+
507+
return optional_start_index
508+
509+
486510
def match_tags(tag_list: list[str], name: str, version: str) -> tuple[list[str], CommitFinderInfo]:
487511
"""Return items of the passed tag list that match the passed artifact name and version.
488512
@@ -507,9 +531,8 @@ def match_tags(tag_list: list[str], name: str, version: str) -> tuple[list[str],
507531
# Generally version identifiers do not contain the `v` prefix, while tags often do. If a version does contain such
508532
# a prefix, it is expected to be in the tag also. If not, the `v` prefix is left as optional.
509533
v_prefix = "(?:v)?" if not version.lower().startswith("v") else ""
510-
escaped_version = re.escape(version)
511534
almost_exact_pattern = re.compile(
512-
f"^(?:[^/]+/)?(?P<prefix>{re.escape(name)}-)?{v_prefix}{escaped_version}$", re.IGNORECASE
535+
f"^(?:[^/]+/)?(?P<prefix>{re.escape(name)}-)?{v_prefix}{re.escape(version)}$", re.IGNORECASE
513536
)
514537

515538
# Compare tags to the almost exact pattern. Prefer tags that matched the name prefix as well.

tests/repo_finder/test_commit_finder.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from pydriller.git import Git
1717

1818
from macaron.repo_finder import commit_finder
19-
from macaron.repo_finder.commit_finder import AbstractPurlType
19+
from macaron.repo_finder.commit_finder import AbstractPurlType, determine_optional_suffix_index
2020
from macaron.repo_finder.repo_finder_enums import CommitFinderInfo
2121
from tests.slsa_analyzer.mock_git_utils import commit_files, initiate_repo
2222

@@ -246,6 +246,20 @@ def test_commit_finder_tag_no_commit(mocked_repo: Git) -> None:
246246
assert outcome == CommitFinderInfo.NO_TAGS_WITH_COMMITS
247247

248248

249+
@pytest.mark.parametrize(
250+
("version", "parts", "expected"),
251+
[
252+
("1.2.RELEASE", ["1", "2", "RELEASE"], 2),
253+
("3.1.test.2.M5", ["3", "1", "test", "2", "M5"], 4),
254+
("2.2-3", ["2", "2", "3"], 2),
255+
("5.4.3_test.2.1", ["5", "4", "3", "test", "2", "1"], 3),
256+
],
257+
)
258+
def test_commit_finder_optional_suffixes(version: str, parts: list, expected: int) -> None:
259+
"""Test the optional suffix function."""
260+
assert determine_optional_suffix_index(version, parts) == expected
261+
262+
249263
@given(text())
250264
@settings(max_examples=10000, deadline=None)
251265
def test_pattern_generation(version: str) -> None:

0 commit comments

Comments
 (0)