Skip to content

Commit d452a56

Browse files
committed
chore: remove path from check id; fixup comments and typing flags
Signed-off-by: Ben Selwyn-Smith <[email protected]>
1 parent a937044 commit d452a56

File tree

5 files changed

+18
-17
lines changed

5 files changed

+18
-17
lines changed

src/macaron/malware_analyzer/pypi_heuristics/sourcecode/pypi_sourcecode_analyzer.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import json
1212
import logging
1313
import os
14-
import subprocess # nosec
14+
import subprocess # nosec B404
1515
import tempfile
1616

1717
import yaml
@@ -120,7 +120,7 @@ def _load_defaults(self, resources_path: str) -> tuple[str, str | None, set[str]
120120

121121
semgrep_commands: list[str] = ["semgrep", "scan", "--validate", "--oss-only", "--config", custom_rule_path]
122122
try:
123-
process = subprocess.run(semgrep_commands, check=True, capture_output=True) # nosec
123+
process = subprocess.run(semgrep_commands, check=True, capture_output=True) # nosec B603
124124
except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as semgrep_error:
125125
error_msg = (
126126
f"Unable to run semgrep validation on {custom_rule_path} with arguments "
@@ -185,8 +185,8 @@ def _extract_rule_ids(self, path: str, target_files: set[str]) -> set[str]:
185185
If any Semgrep rule file could not be safely loaded, or if their format was not in the expected Semgrep
186186
format, or if there were any files in 'target_files' not found when searching in 'path'.
187187
"""
188-
# We keep a record of any file paths we coulnd't find to provide a more useful error message, rather than raising
189-
# an error on the first missing file we see.
188+
# We keep a record of any file paths we couldn't find to provide a more useful error message, rather than
189+
# raising an error on the first missing file we see.
190190
missing_files: list[str] = []
191191
target_file_paths: list[str] = []
192192
rule_ids: set[str] = set()
@@ -211,7 +211,7 @@ def _extract_rule_ids(self, path: str, target_files: set[str]) -> set[str]:
211211
logger.debug(error_msg)
212212
raise ConfigurationError(error_msg) from yaml_error
213213

214-
# should be a top-level key "rules", and then a list of rules (dictionaries) with "id" entries
214+
# Should be a top-level key "rules", and then a list of rules (dictionaries) with "id" entries.
215215
try:
216216
for semgrep_rule in semgrep_ruleset["rules"]:
217217
rule_ids.add(semgrep_rule["id"])
@@ -243,11 +243,11 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
243243
if there is no source code available.
244244
"""
245245
analysis_result: dict = {}
246-
# since we have to run them anyway, return disabled rule findings for debug information
246+
# Since we have to run them anyway, return disabled rule findings for debug information.
247247
disabled_results: dict = {}
248248
# Here, we disable 'nosemgrep' ignoring so that this is not an evasion method of our scan (i.e. malware includes
249249
# 'nosemgrep' comments to prevent our scan detecting those code lines). Read more about the 'nosemgrep' feature
250-
# here: https://semgrep.dev/docs/ignoring-files-folders-code
250+
# here: https://semgrep.dev/docs/ignoring-files-folders-code.
251251
semgrep_commands: list[str] = ["semgrep", "scan", "--oss-only", "--disable-nosem"]
252252
result: HeuristicResult = HeuristicResult.PASS
253253

@@ -266,7 +266,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
266266
semgrep_commands.append(f"--json-output={output_json_file.name}")
267267
logger.debug("executing: %s.", semgrep_commands)
268268
try:
269-
process = subprocess.run(semgrep_commands, check=True, capture_output=True) # nosec
269+
process = subprocess.run(semgrep_commands, check=True, capture_output=True) # nosec B603
270270
except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as semgrep_error:
271271
error_msg = (
272272
f"Unable to run semgrep on {source_code_path} with arguments {semgrep_commands}: {semgrep_error}"
@@ -298,6 +298,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
298298
file = json_extract(finding, ["path"], str)
299299
if not rule_id or not file:
300300
continue
301+
rule_id = rule_id.split(".")[-1]
301302

302303
file = os.path.relpath(file, os.path.dirname(source_code_path))
303304
start = json_extract(finding, ["start", "line"], int)
@@ -310,7 +311,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
310311
# final element in that path, so we use that to match our rule IDs.
311312
# e.g. rule_id = src.macaron.resources.pypi_malware_rules.obfuscation_decode-and-execute, which comes from
312313
# the rule ID 'obfuscation_decode-and-execute' inside 'obfuscation.yaml'.
313-
if rule_id.split(".")[-1] in self.disabled_rule_ids:
314+
if rule_id in self.disabled_rule_ids:
314315
if rule_id not in disabled_results:
315316
disabled_results[rule_id] = {"message": message, "detections": []}
316317
disabled_results[rule_id]["detections"].append({"file": file, "start": start, "end": end})
@@ -320,7 +321,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
320321
analysis_result[rule_id] = {"message": message, "detections": []}
321322
analysis_result[rule_id]["detections"].append({"file": file, "start": start, "end": end})
322323

323-
# some semgrep rules were triggered, even after removing disabled ones
324+
# Some semgrep rules were triggered, even after removing disabled ones.
324325
if analysis_result:
325326
result = HeuristicResult.FAIL
326327

src/macaron/parsers/pomparser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

44
"""This module contains the parser for POM files."""
55
import logging
6-
from xml.etree.ElementTree import Element # nosec
6+
from xml.etree.ElementTree import Element # nosec B405
77

88
import defusedxml.ElementTree
99
from defusedxml.ElementTree import fromstring

src/macaron/repo_finder/repo_finder_java.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import logging
66
import re
77
import urllib.parse
8-
from xml.etree.ElementTree import Element # nosec
8+
from xml.etree.ElementTree import Element # nosec B405
99

1010
from packageurl import PackageURL
1111

tests/malware_analyzer/pypi/resources/sourcecode_samples/exfiltration/expected_results.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"enabled_sourcecode_rule_findings": {
3-
"src.macaron.resources.pypi_malware_rules.exfiltration_remote-exfiltration": {
3+
"exfiltration_remote-exfiltration": {
44
"message": "Detected exfiltration of sensitive data to a remote endpoint",
55
"detections": [
66
{

tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/expected_results.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"enabled_sourcecode_rule_findings": {
3-
"src.macaron.resources.pypi_malware_rules.obfuscation_decode-and-execute": {
3+
"obfuscation_decode-and-execute": {
44
"message": "Detected the flow of a decoded primitive value to a remote endpoint, process, code evaluation, or file write",
55
"detections": [
66
{
@@ -35,7 +35,7 @@
3535
}
3636
]
3737
},
38-
"src.macaron.resources.pypi_malware_rules.obfuscation_inline-imports": {
38+
"obfuscation_inline-imports": {
3939
"message": "Found an instance of a suspicious API in a hardcoded inline import",
4040
"detections": [
4141
{
@@ -105,7 +105,7 @@
105105
}
106106
]
107107
},
108-
"src.macaron.resources.pypi_malware_rules.obfuscation_obfuscation-tools": {
108+
"obfuscation_obfuscation-tools": {
109109
"message": "Found an indicator of the use of a python code obfuscation tool",
110110
"detections": [
111111
{

0 commit comments

Comments
 (0)