Skip to content

Commit

Permalink
Use CVEorg keywords from ps-constants (#850)
Browse files Browse the repository at this point in the history
This PR changes the CVEorg collector to use keywords from the
`ps-constants` repository instead of the ones hardcoded in the `osidb`
repository. Also, it renames `blacklist` and `whitelist` to `blocklist`
and `allowlist`.

Closes OSIDB-3694
  • Loading branch information
jobselko authored Dec 11, 2024
2 parents 83ae2f4 + 375e86c commit 7bc59d2
Show file tree
Hide file tree
Showing 12 changed files with 507 additions and 1,384 deletions.
836 changes: 44 additions & 792 deletions collectors/cveorg/keywords.py

Large diffs are not rendered by default.

23 changes: 23 additions & 0 deletions collectors/cveorg/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 4.2.16 on 2024-12-10 14:05

from django.db import migrations, models
import uuid


class Migration(migrations.Migration):

initial = True

dependencies = [
]

operations = [
migrations.CreateModel(
name='Keyword',
fields=[
('uuid', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
('keyword', models.CharField(max_length=255, unique=True)),
('type', models.CharField(choices=[('ALLOWLIST', 'Allowlist'), ('ALLOWLIST_SPECIAL_CASE', 'Allowlist Special Case'), ('BLOCKLIST', 'Blocklist'), ('BLOCKLIST_SPECIAL_CASE', 'Blocklist Special Case')], max_length=25)),
],
),
]
Empty file.
23 changes: 23 additions & 0 deletions collectors/cveorg/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import uuid

from django.db import models


class Keyword(models.Model):
"""
An instance of this model represents a keyword of a given type
collected from `data/cveorg_keywords.yml` in the `ps-constants` repository.
These keywords determine whether the CVEorg collector should create a flaw.
"""

class Type(models.TextChoices):
ALLOWLIST = "ALLOWLIST"
ALLOWLIST_SPECIAL_CASE = "ALLOWLIST_SPECIAL_CASE"
BLOCKLIST = "BLOCKLIST"
BLOCKLIST_SPECIAL_CASE = "BLOCKLIST_SPECIAL_CASE"

# internal primary key
uuid = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
keyword = models.CharField(max_length=255, unique=True)
type = models.CharField(choices=Type.choices, max_length=25)
15 changes: 15 additions & 0 deletions collectors/cveorg/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from django.utils import timezone

from collectors.cveorg.collectors import CVEorgCollector
from collectors.cveorg.models import Keyword


@pytest.fixture(autouse=True)
Expand All @@ -16,6 +17,20 @@ def auto_enable_sync(enable_jira_task_sync, enable_bz_sync) -> None:
pass


@pytest.fixture()
def mock_keywords(monkeypatch) -> None:
"""
Set testing keywords to mock the ones from the ps-constants repository.
"""
Keyword(keyword="kernel", type=Keyword.Type.ALLOWLIST).save()
Keyword(keyword=r"(?:\W|^)\.NET\b", type=Keyword.Type.ALLOWLIST_SPECIAL_CASE).save()
Keyword(keyword=".*plugin.*for WordPress", type=Keyword.Type.BLOCKLIST).save()
Keyword(keyword="Cisco", type=Keyword.Type.BLOCKLIST).save()
Keyword(keyword="IBM Tivoli", type=Keyword.Type.BLOCKLIST).save()
Keyword(keyword="iTunes", type=Keyword.Type.BLOCKLIST).save()
Keyword(keyword="iOS", type=Keyword.Type.BLOCKLIST_SPECIAL_CASE).save()


@pytest.fixture()
def mock_repo(monkeypatch) -> None:
"""
Expand Down
8 changes: 4 additions & 4 deletions collectors/cveorg/tests/test_collectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

class TestCVEorgCollector:
@pytest.mark.vcr
def test_collect_cveorg_records(self, mock_repo):
def test_collect_cveorg_records(self, mock_keywords, mock_repo):
"""
Test that snippets and flaws are created correctly.
"""
Expand All @@ -38,7 +38,7 @@ def test_collect_cveorg_records(self, mock_repo):
assert snippet2
assert snippet2.flaw == flaw2

def test_collect_cveorg_record_when_flaw_exists(self, mock_repo):
def test_collect_cveorg_record_when_flaw_exists(self, mock_keywords, mock_repo):
"""
Test that only a snippet is created when a flaw already exists.
"""
Expand All @@ -57,7 +57,7 @@ def test_collect_cveorg_record_when_flaw_exists(self, mock_repo):
snippet = Snippet.objects.first()
assert snippet.flaw == flaw

def test_ignored_cveorg_records(self, mock_repo):
def test_ignored_cveorg_records(self, mock_keywords, mock_repo):
"""
Test that snippets and flaws are not created when they do not comply with rules.
"""
Expand Down Expand Up @@ -157,7 +157,7 @@ def get_repo_changes(self):
assert Snippet.objects.all().count() == 0
assert Flaw.objects.all().count() == 0

def test_atomicity(self, monkeypatch, mock_repo):
def test_atomicity(self, monkeypatch, mock_keywords, mock_repo):
"""
Test that flaw and snippet are not created if any error occurs during the flaw creation.
"""
Expand Down
36 changes: 24 additions & 12 deletions collectors/cveorg/tests/test_keywords.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
import pytest

from collectors.cveorg.keywords import check_keywords, should_create_snippet
from collectors.cveorg.keywords import (
MissingKeywordsException,
check_keywords,
should_create_snippet,
)


@pytest.mark.parametrize(
"text, expected_output",
[
("Internet is a great thing!", ([], [])),
("IBM Tivoli is blue and red.", (["IBM Tivoli"], [])),
("we want to whitelist kernel", ([], ["kernel"])),
("we want to allowlist kernel", ([], ["kernel"])),
],
)
def test_check_keywords(text, expected_output):
def test_check_keywords(text, expected_output, mock_keywords):
assert check_keywords(text) == expected_output


Expand All @@ -22,7 +26,7 @@ def test_check_keywords(text, expected_output):
("new iOS is released", (["iOS"], [])),
],
)
def test_check_keywords_case_sensitive(text, expected_output):
def test_check_keywords_case_sensitive(text, expected_output, mock_keywords):
assert check_keywords(text) == expected_output


Expand All @@ -39,7 +43,7 @@ def test_check_keywords_case_sensitive(text, expected_output):
("new iOS is released", (["iOS"], [])),
],
)
def test_check_keywords_word_boundary(text, expected_output):
def test_check_keywords_word_boundary(text, expected_output, mock_keywords):
assert check_keywords(text) == expected_output


Expand All @@ -54,7 +58,7 @@ def test_check_keywords_word_boundary(text, expected_output):
("end of sentence .NET. new sentence", ([], [".NET"])),
],
)
def test_check_keywords_dotnet_special_case(text, expected_output):
def test_check_keywords_dotnet_special_case(text, expected_output, mock_keywords):
assert check_keywords(text) == expected_output


Expand All @@ -78,27 +82,35 @@ def test_check_keywords_dotnet_special_case(text, expected_output):
),
],
)
def test_check_keywords_wordpress(text, expected_output):
def test_check_keywords_wordpress(text, expected_output, mock_keywords):
assert check_keywords(text) == expected_output


@pytest.mark.parametrize(
"text, should_create",
[
# in both blacklist and whitelist
# in both blocklist and allowlist
("kernel and iOS in description", True),
# in whitelist only
# in allowlist only
("kernel and ios in description", True),
# not in whitelist or blacklist
# not in allowlist or blocklist
("something else in description", True),
# in blacklist only
# in blocklist only
("iOS in description", False),
# nothing to check
(None, False),
],
)
def test_should_create_snippet(text, should_create):
def test_should_create_snippet(text, should_create, mock_keywords):
"""
Check whether a snippet should be created based on keywords in `text`.
"""
assert should_create_snippet(text) == should_create


def test_missing_keywords():
"""
Test that missing keywords raise an error.
"""
with pytest.raises(MissingKeywordsException):
should_create_snippet("iOS in description")
26 changes: 26 additions & 0 deletions collectors/ps_constants/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from apps.sla.models import SLA, SLAPolicy
from apps.trackers.models import JiraBugIssuetype
from collectors.cveorg.models import Keyword
from osidb.models import SpecialConsiderationPackage

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -68,3 +69,28 @@ def sync_jira_bug_issuetype(source_dict):
JiraBugIssuetype.objects.all().delete()
for project in list(source_dict.values())[0]:
JiraBugIssuetype.objects.get_or_create(project=project)


@transaction.atomic
def sync_cveorg_keywords(source: dict) -> None:
"""
Sync CVEorg keywords in the database
"""
try:
keywords = [
(Keyword.Type.ALLOWLIST, source["allowlist"]),
(Keyword.Type.ALLOWLIST_SPECIAL_CASE, source["allowlist_special_cases"]),
(Keyword.Type.BLOCKLIST, source["blocklist"]),
(Keyword.Type.BLOCKLIST_SPECIAL_CASE, source["blocklist_special_cases"]),
]
except KeyError:
raise KeyError(
"The ps-constants repository does not contain the expected CVEorg keyword sections."
)

# Delete and recreate keywords
Keyword.objects.all().delete()
for keyword_type, data in keywords:
for entry in data:
keyword = Keyword(keyword=entry, type=keyword_type)
keyword.save()
10 changes: 10 additions & 0 deletions collectors/ps_constants/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from .constants import PS_CONSTANTS_REPO_BRANCH, PS_CONSTANTS_REPO_URL
from .core import (
fetch_ps_constants,
sync_cveorg_keywords,
sync_jira_bug_issuetype,
sync_sla_policies,
sync_special_consideration_packages,
Expand Down Expand Up @@ -46,18 +47,25 @@ def collect_step_1_fetch():
logger.info(f"Fetching PS Constants (Jira Bug issuetype) from '{url}'")
jira_bug_issuetype = fetch_ps_constants(url)

url = f"{PS_CONSTANTS_BASE_URL}/cveorg_keywords.yml"
logger.info(f"Fetching CVEorg keywords from '{url}'")
cveorg_keywords = fetch_ps_constants(url)

return (
cveorg_keywords,
sc_packages,
sla_policies,
jira_bug_issuetype,
)


def collect_step_2_sync(
cveorg_keywords,
sc_packages,
sla_policies,
jira_bug_issuetype,
):
sync_cveorg_keywords(cveorg_keywords)
sync_special_consideration_packages(sc_packages)
sync_sla_policies(sla_policies)
sync_jira_bug_issuetype(jira_bug_issuetype)
Expand All @@ -83,6 +91,7 @@ def ps_constants_collector(collector_obj) -> str:
"""ps constants collector"""

(
cveorg_keywords,
sc_packages,
sla_policies,
jira_bug_issuetype,
Expand All @@ -96,6 +105,7 @@ def ps_constants_collector(collector_obj) -> str:
)

collect_step_2_sync(
cveorg_keywords,
sc_packages,
sla_policies,
jira_bug_issuetype,
Expand Down
Loading

0 comments on commit 7bc59d2

Please sign in to comment.