diff --git a/.prettierignore b/.prettierignore index d63dc2c0135..70ca255b4c9 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,4 +1,5 @@ **/fixtures/** +**/testdata/** **/fixtures-go/** /docs/vendor/** /internal/output/html/*template.html diff --git a/cmd/osv-scanner/__snapshots__/main_test.snap b/cmd/osv-scanner/__snapshots__/main_test.snap index 53f3455d707..1d0d9c5178f 100755 --- a/cmd/osv-scanner/__snapshots__/main_test.snap +++ b/cmd/osv-scanner/__snapshots__/main_test.snap @@ -349,9 +349,9 @@ overriding license for package Packagist/league/flysystem/1.0.8 with 0BSD | LICENSE VIOLATION | ECOSYSTEM | PACKAGE | VERSION | SOURCE | +-------------------+-----------+------------------------------------------------+---------+-------------------------------------------------------+ | 0BSD | Packagist | league/flysystem | 1.0.8 | fixtures/locks-insecure/composer.lock | -| UNKNOWN | | https://github.com/flutter/buildroot.git | | fixtures/locks-insecure/osv-scanner-flutter-deps.json | -| UNKNOWN | | https://github.com/brendan-duncan/archive.git | | fixtures/locks-insecure/osv-scanner-flutter-deps.json | | UNKNOWN | | https://chromium.googlesource.com/chromium/src | | fixtures/locks-insecure/osv-scanner-flutter-deps.json | +| UNKNOWN | | https://github.com/brendan-duncan/archive.git | | fixtures/locks-insecure/osv-scanner-flutter-deps.json | +| UNKNOWN | | https://github.com/flutter/buildroot.git | | fixtures/locks-insecure/osv-scanner-flutter-deps.json | | UNKNOWN | RubyGems | ast | 2.4.2 | fixtures/locks-many/Gemfile.lock | | 0BSD | Packagist | sentry/sdk | 2.0.4 | fixtures/locks-many/composer.lock | +-------------------+-----------+------------------------------------------------+---------+-------------------------------------------------------+ @@ -908,6 +908,68 @@ Scanned /fixtures/call-analysis-go-project/go.mod file and found 4 pack --- +[TestRun_Docker/Fake_alpine_image - 1] +Pulling docker image ("alpine:non-existent-tag")... + +--- + +[TestRun_Docker/Fake_alpine_image - 2] +Docker command exited with code ("/usr/bin/docker pull -q alpine:non-existent-tag"): 1 +STDERR: +> Error response from daemon: manifest for alpine:non-existent-tag not found: manifest unknown: manifest unknown +failed to run docker command + +--- + +[TestRun_Docker/Fake_image_entirely - 1] +Pulling docker image ("this-image-definitely-does-not-exist-abcde")... + +--- + +[TestRun_Docker/Fake_image_entirely - 2] +Docker command exited with code ("/usr/bin/docker pull -q this-image-definitely-does-not-exist-abcde"): 1 +STDERR: +> Error response from daemon: pull access denied for this-image-definitely-does-not-exist-abcde, repository does not exist or may require 'docker login': denied: requested access to the resource is denied +failed to run docker command + +--- + +[TestRun_Docker/Real_Alpine_image - 1] +Pulling docker image ("alpine:3.18.9")... +Saving docker image ("alpine:3.18.9") to temporary file... +Scanning image... +No issues found + +--- + +[TestRun_Docker/Real_Alpine_image - 2] + +--- + +[TestRun_Docker/Real_empty_image - 1] +Pulling docker image ("hello-world")... +Saving docker image ("hello-world") to temporary file... +Scanning image... + +--- + +[TestRun_Docker/Real_empty_image - 2] +No package sources found, --help for usage information. + +--- + +[TestRun_Docker/Real_empty_image_with_tag - 1] +Pulling docker image ("hello-world:linux")... +Saving docker image ("hello-world:linux") to temporary file... +Scanning image... + +--- + +[TestRun_Docker/Real_empty_image_with_tag - 2] +No package sources found, --help for usage information. + +--- + [TestRun_GithubActions/scanning_osv-scanner_custom_format - 1] Scanned /fixtures/locks-insecure/osv-scanner-flutter-deps.json file as a osv-scanner and found 3 packages +--------------------------------+------+-----------+----------------------------+----------------------------+-------------------------------------------------------+ @@ -2270,7 +2332,7 @@ No issues found --- [TestRun_LockfileWithExplicitParseAs/empty_works_as_an_escape_(no_fixture_because_it's_not_valid_on_Windows) - 2] -open /path/to/my:file: no such file or directory +stat /path/to/my:file: no such file or directory --- @@ -2279,7 +2341,7 @@ open /path/to/my:file: no such file or directory --- [TestRun_LockfileWithExplicitParseAs/empty_works_as_an_escape_(no_fixture_because_it's_not_valid_on_Windows)#01 - 2] -open /path/to/my:project/package-lock.json: no such file or directory +stat /path/to/my:project/package-lock.json: no such file or directory --- @@ -2288,7 +2350,7 @@ open /path/to/my:project/package-lock.json: no such file or directory --- [TestRun_LockfileWithExplicitParseAs/files_that_error_on_parsing_stop_parsable_files_from_being_checked - 2] -(extracting as Cargo.lock) could not extract from /fixtures/locks-insecure/my-package-lock.json: toml: line 1: expected '.' or '=', but got '{' instead +(extracting as rust/Cargolock) could not extract from /fixtures/locks-insecure/my-package-lock.json: toml: line 1: expected '.' or '=', but got '{' instead --- @@ -2346,7 +2408,7 @@ No issues found --- [TestRun_LockfileWithExplicitParseAs/parse-as_takes_priority,_even_if_it's_wrong - 2] -(extracting as package-lock.json) could not extract from /fixtures/locks-many/yarn.lock: invalid character '#' looking for beginning of value +(extracting as javascript/packagelockjson) could not extract from "/fixtures/locks-many/yarn.lock": invalid character '#' looking for beginning of value --- @@ -2399,7 +2461,7 @@ No issues found --- -[TestRun_MavenTransitive/resolve_transitive_dependencies_with_native_datda_source - 1] +[TestRun_MavenTransitive/resolve_transitive_dependencies_with_native_data_source - 1] Scanned /fixtures/maven-transitive/registry.xml file as a pom.xml and found 59 packages +-------------------------------------+------+-----------+-----------------------------------------------+---------+----------------------------------------+ | OSV URL | CVSS | ECOSYSTEM | PACKAGE | VERSION | SOURCE | @@ -2413,7 +2475,7 @@ Scanned /fixtures/maven-transitive/registry.xml file as a pom.xml and f --- -[TestRun_MavenTransitive/resolve_transitive_dependencies_with_native_datda_source - 2] +[TestRun_MavenTransitive/resolve_transitive_dependencies_with_native_data_source - 2] --- @@ -2528,17 +2590,17 @@ Scanning image ../../internal/image/fixtures/test-node_modules-npm-empty.tar [TestRun_OCIImage/scanning_node_modules_using_npm_with_some_packages - 1] Scanning image ../../internal/image/fixtures/test-node_modules-npm-full.tar -+-------------------------------------+------+--------------+----------+------------+-------------------------------------------------------------------------------------------------------+ -| OSV URL | CVSS | ECOSYSTEM | PACKAGE | VERSION | SOURCE | -+-------------------------------------+------+--------------+----------+------------+-------------------------------------------------------------------------------------------------------+ -| https://osv.dev/CVE-2023-42363 | 5.5 | Alpine:v3.19 | busybox | 1.36.1-r15 | ../../internal/image/fixtures/test-node_modules-npm-full.tar:/lib/apk/db/installed | -| https://osv.dev/CVE-2023-42364 | 5.5 | Alpine:v3.19 | busybox | 1.36.1-r15 | ../../internal/image/fixtures/test-node_modules-npm-full.tar:/lib/apk/db/installed | -| https://osv.dev/CVE-2023-42365 | 5.5 | Alpine:v3.19 | busybox | 1.36.1-r15 | ../../internal/image/fixtures/test-node_modules-npm-full.tar:/lib/apk/db/installed | -| https://osv.dev/CVE-2023-42366 | 5.5 | Alpine:v3.19 | busybox | 1.36.1-r15 | ../../internal/image/fixtures/test-node_modules-npm-full.tar:/lib/apk/db/installed | -| https://osv.dev/GHSA-38f5-ghc2-fcmv | 9.8 | npm | cryo | 0.0.6 | ../../internal/image/fixtures/test-node_modules-npm-full.tar:/usr/app/node_modules/.package-lock.json | -| https://osv.dev/GHSA-vh95-rmgr-6w4m | 9.8 | npm | minimist | 0.0.8 | ../../internal/image/fixtures/test-node_modules-npm-full.tar:/usr/app/node_modules/.package-lock.json | -| https://osv.dev/GHSA-xvch-5gv4-984h | | | | | | -+-------------------------------------+------+--------------+----------+------------+-------------------------------------------------------------------------------------------------------+ ++-------------------------------------+------+--------------+----------+------------+--------------------------------------------------------------------------------------------------------+ +| OSV URL | CVSS | ECOSYSTEM | PACKAGE | VERSION | SOURCE | ++-------------------------------------+------+--------------+----------+------------+--------------------------------------------------------------------------------------------------------+ +| https://osv.dev/CVE-2023-42363 | 5.5 | Alpine:v3.19 | busybox | 1.36.1-r15 | ../../internal/image/fixtures/test-node_modules-npm-full.tar:/lib/apk/db/installed | +| https://osv.dev/CVE-2023-42364 | 5.5 | Alpine:v3.19 | busybox | 1.36.1-r15 | ../../internal/image/fixtures/test-node_modules-npm-full.tar:/lib/apk/db/installed | +| https://osv.dev/CVE-2023-42365 | 5.5 | Alpine:v3.19 | busybox | 1.36.1-r15 | ../../internal/image/fixtures/test-node_modules-npm-full.tar:/lib/apk/db/installed | +| https://osv.dev/CVE-2023-42366 | 5.5 | Alpine:v3.19 | busybox | 1.36.1-r15 | ../../internal/image/fixtures/test-node_modules-npm-full.tar:/lib/apk/db/installed | +| https://osv.dev/GHSA-38f5-ghc2-fcmv | 9.8 | npm | cryo | 0.0.6 | ../../internal/image/fixtures/test-node_modules-npm-full.tar:/prod/app/node_modules/.package-lock.json | +| https://osv.dev/GHSA-vh95-rmgr-6w4m | 9.8 | npm | minimist | 0.0.8 | ../../internal/image/fixtures/test-node_modules-npm-full.tar:/prod/app/node_modules/.package-lock.json | +| https://osv.dev/GHSA-xvch-5gv4-984h | | | | | | ++-------------------------------------+------+--------------+----------+------------+--------------------------------------------------------------------------------------------------------+ --- diff --git a/cmd/osv-scanner/fixtures/locks-requirements/my-requirements.txt b/cmd/osv-scanner/fixtures/locks-requirements/my-requirements.txt index 7e1060246fd..0e463a4d028 100644 --- a/cmd/osv-scanner/fixtures/locks-requirements/my-requirements.txt +++ b/cmd/osv-scanner/fixtures/locks-requirements/my-requirements.txt @@ -1 +1 @@ -flask +flask==1.0.0 diff --git a/cmd/osv-scanner/fixtures/locks-requirements/requirements-dev.txt b/cmd/osv-scanner/fixtures/locks-requirements/requirements-dev.txt index 7e66a17d49c..4fae28300e4 100644 --- a/cmd/osv-scanner/fixtures/locks-requirements/requirements-dev.txt +++ b/cmd/osv-scanner/fixtures/locks-requirements/requirements-dev.txt @@ -1 +1 @@ -black +black==1.0.0 diff --git a/cmd/osv-scanner/fixtures/locks-requirements/requirements.txt b/cmd/osv-scanner/fixtures/locks-requirements/requirements.txt index d0dae5a60f6..911f55bcf95 100644 --- a/cmd/osv-scanner/fixtures/locks-requirements/requirements.txt +++ b/cmd/osv-scanner/fixtures/locks-requirements/requirements.txt @@ -1,3 +1,3 @@ -flask -flask-cors +flask==1.0.0 +flask-cors==1.0.0 pandas==0.23.4 diff --git a/cmd/osv-scanner/fixtures/locks-requirements/the_requirements_for_test.txt b/cmd/osv-scanner/fixtures/locks-requirements/the_requirements_for_test.txt index e079f8a6038..35663c020e6 100644 --- a/cmd/osv-scanner/fixtures/locks-requirements/the_requirements_for_test.txt +++ b/cmd/osv-scanner/fixtures/locks-requirements/the_requirements_for_test.txt @@ -1 +1 @@ -pytest +pytest==1.0.0 diff --git a/cmd/osv-scanner/fixtures/sbom-insecure/osv-scanner.toml b/cmd/osv-scanner/fixtures/sbom-insecure/osv-scanner.toml index 80e5b8b2ca3..4a3e9070b85 100644 --- a/cmd/osv-scanner/fixtures/sbom-insecure/osv-scanner.toml +++ b/cmd/osv-scanner/fixtures/sbom-insecure/osv-scanner.toml @@ -1,64 +1,3 @@ -[[IgnoredVulns]] -id = "GO-2022-0274" -# ignoreUntil = n/a -reason = "This is an intentionally vulnerable test sbom" - -[[IgnoredVulns]] -id = "GO-2022-0493" -# ignoreUntil = n/a -reason = "This is an intentionally vulnerable test sbom" - -[[IgnoredVulns]] -id = "GHSA-vpvm-3wq2-2wvm" -# ignoreUntil = n/a -reason = "This is an intentionally vulnerable test sbom" - -[[IgnoredVulns]] -id = "GHSA-m8cg-xc2p-r3fc" -# ignoreUntil = n/a -reason = "This is an intentionally vulnerable test sbom" - -[[IgnoredVulns]] -id = "GHSA-g2j6-57v7-gm8c" -# ignoreUntil = n/a -reason = "This is an intentionally vulnerable test sbom" - -[[IgnoredVulns]] -id = "GHSA-f3fp-gc8g-vw66" -# ignoreUntil = n/a -reason = "This is an intentionally vulnerable test sbom" - -[[IgnoredVulns]] -id = "DLA-3008-1" -# ignoreUntil = n/a -reason = "This is an intentionally vulnerable test sbom" - -[[IgnoredVulns]] -id = "DLA-3012-1" -# ignoreUntil = n/a -reason = "This is an intentionally vulnerable test sbom" - -[[IgnoredVulns]] -id = "DLA-3022-1" -# ignoreUntil = n/a -reason = "This is an intentionally vulnerable test sbom" - -[[IgnoredVulns]] -id = "DLA-3051-1" -# ignoreUntil = n/a -reason = "This is an intentionally vulnerable test sbom" - -[[IgnoredVulns]] -id = "CVE-2022-37434" -# ignoreUntil = n/a -reason = "This is an intentionally vulnerable test sbom" - -[[IgnoredVulns]] -id = "CVE-2018-25032" -# ignoreUntil = n/a -reason = "This is an intentionally vulnerable test sbom" - -[[IgnoredVulns]] -id = "GHSA-xr7r-f8xq-vfvv" -# ignoreUntil = n/a +[[PackageOverrides]] +ignore = true reason = "This is an intentionally vulnerable test sbom" diff --git a/cmd/osv-scanner/main.go b/cmd/osv-scanner/main.go index 595b1afe490..3c891f338cc 100644 --- a/cmd/osv-scanner/main.go +++ b/cmd/osv-scanner/main.go @@ -47,6 +47,18 @@ func run(args []string, stdout, stderr io.Writer) int { }, } + // If ExitErrHandler is not set, cli will use the default cli.HandleExitCoder. + // This is not ideal as cli.HandleExitCoder checks if the error implements cli.ExitCode interface. + // + // 99% of the time, this is fine, as we do not implement cli.ExitCode in our errors, so errors pass through + // that handler untouched. + // However, because of Go's duck typing, any error that happens to have a ExitCode() function + // (e.g. *exec.ExitError) will be assumed to implement cli.ExitCode interface and cause the program to exit + // early without proper error handling. + // + // This removes the handler entirely so that behavior will not unexpectedly happen. + app.ExitErrHandler = func(_ *cli.Context, _ error) {} + args = insertDefaultCommand(args, app.Commands, app.DefaultCommand, stdout, stderr) if err := app.Run(args); err != nil { diff --git a/cmd/osv-scanner/main_test.go b/cmd/osv-scanner/main_test.go index a8c543ef1c9..26466c74867 100644 --- a/cmd/osv-scanner/main_test.go +++ b/cmd/osv-scanner/main_test.go @@ -7,6 +7,7 @@ import ( "os" "path/filepath" "reflect" + "runtime" "strings" "testing" @@ -517,7 +518,12 @@ func TestRun_LockfileWithExplicitParseAs(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() - testCli(t, tt) + stdout, stderr := runCli(t, tt) + + testutility.NewSnapshot().MatchText(t, stdout) + testutility.NewSnapshot().WithWindowsReplacements(map[string]string{ + "CreateFile": "stat", + }).MatchText(t, stderr) }) } } @@ -728,6 +734,51 @@ func TestRun_Licenses(t *testing.T) { } } +func TestRun_Docker(t *testing.T) { + t.Parallel() + + testutility.SkipIfNotAcceptanceTesting(t, "Takes a long time to pull down images") + + tests := []cliTestCase{ + { + name: "Fake alpine image", + args: []string{"", "--docker", "alpine:non-existent-tag"}, + exit: 127, + }, + { + name: "Fake image entirely", + args: []string{"", "--docker", "this-image-definitely-does-not-exist-abcde"}, + exit: 127, + }, + // TODO: How to prevent these snapshots from changing constantly + { + name: "Real empty image", + args: []string{"", "--docker", "hello-world"}, + exit: 128, // No packages found + }, + { + name: "Real empty image with tag", + args: []string{"", "--docker", "hello-world:linux"}, + exit: 128, // No package found + }, + { + name: "Real Alpine image", + args: []string{"", "--docker", "alpine:3.18.9"}, + exit: 0, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + // Only test on linux, and mac/windows CI/CD does not come with docker preinstalled + if runtime.GOOS == "linux" { + testCli(t, tt) + } + }) + } +} + func TestRun_OCIImage(t *testing.T) { t.Parallel() @@ -923,7 +974,7 @@ func TestRun_MavenTransitive(t *testing.T) { exit: 1, }, { - name: "resolve transitive dependencies with native datda source", + name: "resolve transitive dependencies with native data source", args: []string{"", "--config=./fixtures/osv-scanner-empty-config.toml", "--experimental-resolution-data-source=native", "-L", "pom.xml:./fixtures/maven-transitive/registry.xml"}, exit: 1, }, diff --git a/cmd/osv-scanner/scan/main.go b/cmd/osv-scanner/scan/main.go index ffa5281205b..0e455b886d6 100644 --- a/cmd/osv-scanner/scan/main.go +++ b/cmd/osv-scanner/scan/main.go @@ -31,10 +31,10 @@ func Command(stdout, stderr io.Writer, r *reporter.Reporter) *cli.Command { Usage: "scans various mediums for dependencies and matches it against the OSV database", Description: "scans various mediums for dependencies and matches it against the OSV database", Flags: []cli.Flag{ - &cli.StringSliceFlag{ + &cli.StringFlag{ Name: "docker", Aliases: []string{"D"}, - Usage: "scan docker image with this name. Warning: Only run this on a trusted container image, as it runs the container image to retrieve the package versions", + Usage: "scan docker image with this name. This is a convenience function which runs `docker save` before scanning the saved image using --oci-image", TakesFile: false, }, &cli.StringSliceFlag{ @@ -258,15 +258,15 @@ func action(context *cli.Context, stdout, stderr io.Writer) (reporter.Reporter, } vulnResult, err := osvscanner.DoScan(osvscanner.ScannerActions{ - LockfilePaths: context.StringSlice("lockfile"), - SBOMPaths: context.StringSlice("sbom"), - DockerContainerNames: context.StringSlice("docker"), - Recursive: context.Bool("recursive"), - SkipGit: context.Bool("skip-git"), - NoIgnore: context.Bool("no-ignore"), - ConfigOverridePath: context.String("config"), - DirectoryPaths: context.Args().Slice(), - CallAnalysisStates: callAnalysisStates, + LockfilePaths: context.StringSlice("lockfile"), + SBOMPaths: context.StringSlice("sbom"), + DockerImageName: context.String("docker"), + Recursive: context.Bool("recursive"), + SkipGit: context.Bool("skip-git"), + NoIgnore: context.Bool("no-ignore"), + ConfigOverridePath: context.String("config"), + DirectoryPaths: context.Args().Slice(), + CallAnalysisStates: callAnalysisStates, ExperimentalScannerActions: osvscanner.ExperimentalScannerActions{ LocalDBPath: context.String("experimental-local-db-path"), DownloadDatabases: context.Bool("experimental-download-offline-databases"), diff --git a/go.mod b/go.mod index caf5f92b7bf..bda7287ae98 100644 --- a/go.mod +++ b/go.mod @@ -13,12 +13,12 @@ require ( github.com/charmbracelet/bubbletea v1.1.1 github.com/charmbracelet/glamour v0.8.0 github.com/charmbracelet/lipgloss v0.13.0 - github.com/dghubble/trie v0.1.0 github.com/gkampitakis/go-snaps v0.5.7 github.com/go-git/go-billy/v5 v5.5.0 github.com/go-git/go-git/v5 v5.12.0 github.com/google/go-cmp v0.6.0 github.com/google/go-containerregistry v0.20.2 + github.com/google/osv-scalibr v0.1.4-0.20241031120023-761ca671aacb github.com/ianlancetaylor/demangle v0.0.0-20240912202439-0a2b6291aafd github.com/jedib0t/go-pretty/v6 v6.6.0 github.com/muesli/reflow v0.3.0 @@ -44,7 +44,7 @@ require ( require ( dario.cat/mergo v1.0.0 // indirect - github.com/Microsoft/go-winio v0.6.1 // indirect + github.com/Microsoft/go-winio v0.6.2 // indirect github.com/ProtonMail/go-crypto v1.0.0 // indirect github.com/alecthomas/chroma/v2 v2.14.0 // indirect github.com/anchore/go-struct-converter v0.0.0-20230627203149-c72ef8859ca9 // indirect @@ -58,8 +58,6 @@ require ( github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect github.com/cyphar/filepath-securejoin v0.2.4 // indirect github.com/dlclark/regexp2 v1.11.0 // indirect - github.com/docker/distribution v2.8.3+incompatible // indirect - github.com/docker/docker-credential-helpers v0.8.1 // indirect github.com/emirpasic/gods v1.18.1 // indirect github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect github.com/gkampitakis/ciinfo v0.3.0 // indirect @@ -82,14 +80,13 @@ require ( github.com/muesli/cancelreader v0.2.2 // indirect github.com/muesli/termenv v0.15.3-0.20240618155329-98d742f6907a // indirect github.com/opencontainers/go-digest v1.0.0 // indirect - github.com/opencontainers/image-spec v1.1.0-rc3 // indirect + github.com/opencontainers/image-spec v1.1.0 // indirect github.com/pjbgf/sha1cd v0.3.0 // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/rogpeppe/go-internal v1.12.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/sahilm/fuzzy v0.1.1 // indirect github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect - github.com/sirupsen/logrus v1.9.3 // indirect github.com/skeema/knownhosts v1.2.2 // indirect github.com/spdx/gordf v0.0.0-20221230105357-b735bd5aac89 // indirect github.com/tidwall/match v1.1.1 // indirect @@ -106,4 +103,5 @@ require ( google.golang.org/genproto/googleapis/api v0.0.0-20241007155032-5fefd90f89a9 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20241007155032-5fefd90f89a9 // indirect gopkg.in/warnings.v0 v0.1.2 // indirect + sigs.k8s.io/yaml v1.4.0 // indirect ) diff --git a/go.sum b/go.sum index 6c92190e560..c91d66884d2 100644 --- a/go.sum +++ b/go.sum @@ -13,8 +13,8 @@ github.com/BurntSushi/toml v1.4.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2 github.com/CycloneDX/cyclonedx-go v0.9.1 h1:yffaWOZsv77oTJa/SdVZYdgAgFioCeycBUKkqS2qzQM= github.com/CycloneDX/cyclonedx-go v0.9.1/go.mod h1:NE/EWvzELOFlG6+ljX/QeMlVt9VKcTwu8u0ccsACEsw= github.com/Microsoft/go-winio v0.5.2/go.mod h1:WpS1mjBmmwHBEWmogvA2mj8546UReBk4v8QkMxJ6pZY= -github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow= -github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= +github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= +github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/ProtonMail/go-crypto v1.0.0 h1:LRuvITjQWX+WIfr930YHG2HNfjR1uOfyf5vE0kC2U78= github.com/ProtonMail/go-crypto v1.0.0/go.mod h1:EjAoLdwvbIOoOQr3ihjnSoLZRtE8azugULFRteWMNc0= github.com/alecthomas/assert/v2 v2.7.0 h1:QtqSACNS3tF7oasA8CU6A6sXZSBDqnm7RfpLl9bZqbE= @@ -69,8 +69,6 @@ github.com/cyphar/filepath-securejoin v0.2.4/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxG github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/dghubble/trie v0.1.0 h1:kJnjBLFFElBwS60N4tkPvnLhnpcDxbBjIulgI8CpNGM= -github.com/dghubble/trie v0.1.0/go.mod h1:sOmnzfBNH7H92ow2292dDFWNsVQuh/izuD7otCYb1ak= github.com/dlclark/regexp2 v1.11.0 h1:G/nrcoOa7ZXlpoa/91N3X7mM3r8eIlMBBJZvsz/mxKI= github.com/dlclark/regexp2 v1.11.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/docker/cli v27.1.1+incompatible h1:goaZxOqs4QKxznZjjBWKONQci/MywhtRv2oNn0GkeZE= @@ -111,6 +109,8 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-containerregistry v0.20.2 h1:B1wPJ1SN/S7pB+ZAimcciVD+r+yV/l/DSArMxlbwseo= github.com/google/go-containerregistry v0.20.2/go.mod h1:z38EKdKh4h7IP2gSfUUqEvalZBqs6AoLeWfUy34nQC8= +github.com/google/osv-scalibr v0.1.4-0.20241031120023-761ca671aacb h1:A7IvUJk8r3wMuuAMWxwbkE3WBp+oF/v7CcEt3nCy+lI= +github.com/google/osv-scalibr v0.1.4-0.20241031120023-761ca671aacb/go.mod h1:MbEYB+PKqEGjwMdpcoO5DWpi0+57jYgYcw2jlRy8O9Q= github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8= github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0= github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= @@ -161,8 +161,8 @@ github.com/onsi/gomega v1.27.10 h1:naR28SdDFlqrG6kScpT8VWpu1xWY5nJRCF3XaYyBjhI= github.com/onsi/gomega v1.27.10/go.mod h1:RsS8tutOdbdgzbPtzzATp12yT7kM5I5aElG3evPbQ0M= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= -github.com/opencontainers/image-spec v1.1.0-rc3 h1:fzg1mXZFj8YdPeNkRXMg+zb88BFV0Ys52cJydRwBkb8= -github.com/opencontainers/image-spec v1.1.0-rc3/go.mod h1:X4pATf0uXsnn3g5aiGIsVnJBR4mxhKzfwmvK/B2NTm8= +github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug= +github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM= github.com/owenrumney/go-sarif v1.1.1/go.mod h1:dNDiPlF04ESR/6fHlPyq7gHKmrM0sHUvAGjsoh8ZH0U= github.com/owenrumney/go-sarif/v2 v2.3.3 h1:ubWDJcF5i3L/EIOER+ZyQ03IfplbSU1BLOE26uKQIIU= github.com/owenrumney/go-sarif/v2 v2.3.3/go.mod h1:MSqMMx9WqlBSY7pXoOZWgEsVB4FDNfhcaXDA1j6Sr+w= @@ -342,4 +342,5 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/internal/image/__snapshots__/image_test.snap b/internal/image/__snapshots__/image_test.snap index 9d957ad396f..58b8b548972 100755 --- a/internal/image/__snapshots__/image_test.snap +++ b/internal/image/__snapshots__/image_test.snap @@ -4,7 +4,7 @@ "Lockfiles": [ { "filePath": "/lib/apk/db/installed", - "parsedAs": "apk-installed", + "parsedAs": "os/apk", "packages": [ { "name": "alpine-baselayout", @@ -186,7 +186,7 @@ "Lockfiles": [ { "filePath": "/go/bin/more-vuln-overwrite-less-vuln", - "parsedAs": "go-binary", + "parsedAs": "go/binary", "packages": [ { "name": "github.com/BurntSushi/toml", @@ -214,7 +214,7 @@ }, { "filePath": "/go/bin/ptf-1.2.0", - "parsedAs": "go-binary", + "parsedAs": "go/binary", "packages": [ { "name": "github.com/BurntSushi/toml", @@ -242,7 +242,7 @@ }, { "filePath": "/go/bin/ptf-1.3.0", - "parsedAs": "go-binary", + "parsedAs": "go/binary", "packages": [ { "name": "github.com/BurntSushi/toml", @@ -270,7 +270,7 @@ }, { "filePath": "/go/bin/ptf-1.3.0-moved", - "parsedAs": "go-binary", + "parsedAs": "go/binary", "packages": [ { "name": "github.com/BurntSushi/toml", @@ -298,7 +298,7 @@ }, { "filePath": "/go/bin/ptf-1.4.0", - "parsedAs": "go-binary", + "parsedAs": "go/binary", "packages": [ { "name": "github.com/BurntSushi/toml", @@ -326,7 +326,7 @@ }, { "filePath": "/go/bin/ptf-vulnerable", - "parsedAs": "go-binary", + "parsedAs": "go/binary", "packages": [ { "name": "github.com/BurntSushi/toml", @@ -354,7 +354,7 @@ }, { "filePath": "/lib/apk/db/installed", - "parsedAs": "apk-installed", + "parsedAs": "os/apk", "packages": [ { "name": "alpine-baselayout", @@ -536,7 +536,7 @@ "Lockfiles": [ { "filePath": "/lib/apk/db/installed", - "parsedAs": "apk-installed", + "parsedAs": "os/apk", "packages": [ { "name": "alpine-baselayout", @@ -754,7 +754,7 @@ "Lockfiles": [ { "filePath": "/lib/apk/db/installed", - "parsedAs": "apk-installed", + "parsedAs": "os/apk", "packages": [ { "name": "alpine-baselayout", @@ -963,8 +963,8 @@ ] }, { - "filePath": "/usr/app/node_modules/.package-lock.json", - "parsedAs": "node_modules", + "filePath": "/prod/app/node_modules/.package-lock.json", + "parsedAs": "javascript/nodemodules", "packages": [ { "name": "cryo", @@ -1011,7 +1011,7 @@ "Lockfiles": [ { "filePath": "/lib/apk/db/installed", - "parsedAs": "apk-installed", + "parsedAs": "os/apk", "packages": [ { "name": "alpine-baselayout", @@ -1229,7 +1229,7 @@ "Lockfiles": [ { "filePath": "/lib/apk/db/installed", - "parsedAs": "apk-installed", + "parsedAs": "os/apk", "packages": [ { "name": "alpine-baselayout", @@ -1447,7 +1447,7 @@ "Lockfiles": [ { "filePath": "/lib/apk/db/installed", - "parsedAs": "apk-installed", + "parsedAs": "os/apk", "packages": [ { "name": "alpine-baselayout", @@ -1665,7 +1665,7 @@ "Lockfiles": [ { "filePath": "/lib/apk/db/installed", - "parsedAs": "apk-installed", + "parsedAs": "os/apk", "packages": [ { "name": "alpine-baselayout", diff --git a/internal/image/extractor.go b/internal/image/extractor.go index 6ddb7f9f169..18dad0ed635 100644 --- a/internal/image/extractor.go +++ b/internal/image/extractor.go @@ -1,57 +1,79 @@ package image import ( + "context" "errors" "fmt" - "os" - "path" - "sort" - + "io/fs" + "strings" + + "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scalibr/extractor/filesystem" + "github.com/google/osv-scalibr/extractor/filesystem/language/golang/gobinary" + "github.com/google/osv-scalibr/extractor/filesystem/os/apk" + "github.com/google/osv-scalibr/extractor/filesystem/os/dpkg" + "github.com/google/osv-scanner/internal/lockfilescalibr" + "github.com/google/osv-scanner/internal/lockfilescalibr/language/javascript/nodemodules" "github.com/google/osv-scanner/pkg/lockfile" ) // artifactExtractors contains only extractors for artifacts that are important in // the final layer of a container image -var artifactExtractors map[string]lockfile.Extractor = map[string]lockfile.Extractor{ - "node_modules": lockfile.NodeModulesExtractor{}, - "apk-installed": lockfile.ApkInstalledExtractor{}, - "dpkg": lockfile.DpkgStatusExtractor{}, - "go-binary": lockfile.GoBinaryExtractor{}, -} - -type extractorPair struct { - extractor lockfile.Extractor - name string +var artifactExtractors []filesystem.Extractor = []filesystem.Extractor{ + // TODO: Using nodemodules extractor to minimize changes of snapshots + // After annotations are added, we should switch to using packagejson. + // packagejson.New(packagejson.DefaultConfig()), + nodemodules.Extractor{}, + + apk.New(apk.DefaultConfig()), + gobinary.New(gobinary.DefaultConfig()), + // TODO: Add tests for debian containers + dpkg.New(dpkg.DefaultConfig()), } -func findArtifactExtractor(path string) []extractorPair { +func findArtifactExtractor(path string, fileInfo fs.FileInfo) []filesystem.Extractor { // Use ShouldExtract to collect and return a slice of artifactExtractors - var extractors []extractorPair - for name, extractor := range artifactExtractors { - if extractor.ShouldExtract(path) { - extractors = append(extractors, extractorPair{extractor, name}) + var extractors []filesystem.Extractor + for _, extractor := range artifactExtractors { + if extractor.FileRequired(path, fileInfo) { + extractors = append(extractors, extractor) } } return extractors } -func extractArtifactDeps(path string, layer *Layer) (lockfile.Lockfile, error) { - foundExtractors := findArtifactExtractor(path) +// Note: Output is non deterministic +func extractArtifactDeps(extractPath string, layer *Layer) ([]*extractor.Inventory, error) { + pathFileInfo, err := layer.Stat(extractPath) + if err != nil { + return nil, fmt.Errorf("attempted to get FileInfo but failed: %w", err) + } + + scalibrPath := strings.TrimPrefix(extractPath, "/") + foundExtractors := findArtifactExtractor(scalibrPath, pathFileInfo) if len(foundExtractors) == 0 { - return lockfile.Lockfile{}, fmt.Errorf("%w for %s", lockfile.ErrExtractorNotFound, path) + return nil, fmt.Errorf("%w for %s", lockfilescalibr.ErrExtractorNotFound, extractPath) } - packages := []lockfile.PackageDetails{} + inventories := []*extractor.Inventory{} var extractedAs string - for _, extPair := range foundExtractors { + for _, extractor := range foundExtractors { // File has to be reopened per extractor as each extractor moves the read cursor - f, err := OpenLayerFile(path, layer) + f, err := layer.Open(extractPath) if err != nil { - return lockfile.Lockfile{}, fmt.Errorf("attempted to open file but failed: %w", err) + return nil, fmt.Errorf("attempted to open file but failed: %w", err) + } + + scanInput := &filesystem.ScanInput{ + FS: layer, + Path: scalibrPath, + Root: "/", + Reader: f, + Info: pathFileInfo, } - newPackages, err := extPair.extractor.Extract(f) + newPackages, err := extractor.Extract(context.Background(), scanInput) f.Close() if err != nil { @@ -59,76 +81,33 @@ func extractArtifactDeps(path string, layer *Layer) (lockfile.Lockfile, error) { continue } - return lockfile.Lockfile{}, fmt.Errorf("(extracting as %s) %w", extPair.name, err) + return nil, fmt.Errorf("(extracting as %s) %w", extractor.Name(), err) } - extractedAs = extPair.name - packages = newPackages - // TODO(rexpan): Determine if it's acceptable to have multiple extractors + for i := range newPackages { + newPackages[i].Extractor = extractor + } + + extractedAs = extractor.Name() + inventories = newPackages + // TODO(rexpan): Determine if this it's acceptable to have multiple extractors // extract from the same file successfully break } if extractedAs == "" { - return lockfile.Lockfile{}, fmt.Errorf("%w for %s", lockfile.ErrExtractorNotFound, path) + return nil, fmt.Errorf("%w for %s", lockfilescalibr.ErrExtractorNotFound, extractPath) } - // Sort to have deterministic output, and to match behavior of lockfile.extractDeps - sort.Slice(packages, func(i, j int) bool { - if packages[i].Name == packages[j].Name { - return packages[i].Version < packages[j].Version + // Perform any one-off translations here + for _, inv := range inventories { + // Scalibr uses go to indicate go compiler version + // We specifically cares about the stdlib version inside the package + // so convert the package name from go to stdlib + if inv.Ecosystem() == "Go" && inv.Name == "go" { + inv.Name = "stdlib" } - - return packages[i].Name < packages[j].Name - }) - - return lockfile.Lockfile{ - FilePath: path, - ParsedAs: extractedAs, - Packages: packages, - }, nil -} - -// A File represents a file that exists in an image -type File struct { - *os.File - - layer *Layer - path string -} - -func (f File) Open(openPath string) (lockfile.NestedDepFile, error) { - // use path instead of filepath, because container is always in Unix paths (for now) - if path.IsAbs(openPath) { - return OpenLayerFile(openPath, f.layer) - } - - absPath := path.Join(f.path, openPath) - - return OpenLayerFile(absPath, f.layer) -} - -func (f File) Path() string { - return f.path -} - -func OpenLayerFile(path string, layer *Layer) (File, error) { - fileNode, err := layer.getFileNode(path) - if err != nil { - return File{}, err } - file, err := fileNode.Open() - if err != nil { - return File{}, err - } - - return File{ - File: file, - path: path, - layer: layer, - }, nil + return inventories, nil } - -var _ lockfile.DepFile = File{} -var _ lockfile.NestedDepFile = File{} diff --git a/internal/image/fixtures/alpine-3.19-alpine-release b/internal/image/fixtures/alpine-3.18-alpine-release similarity index 100% rename from internal/image/fixtures/alpine-3.19-alpine-release rename to internal/image/fixtures/alpine-3.18-alpine-release diff --git a/internal/image/fixtures/alpine-3.18-os-release b/internal/image/fixtures/alpine-3.18-os-release new file mode 100644 index 00000000000..ffb92a8cd41 --- /dev/null +++ b/internal/image/fixtures/alpine-3.18-os-release @@ -0,0 +1,7 @@ +/ # cat /etc/os-release +NAME="Alpine Linux" +ID=alpine +VERSION_ID=3.18.1 +PRETTY_NAME="Alpine Linux v3.18" +HOME_URL="https://alpinelinux.org/" +BUG_REPORT_URL="https://gitlab.alpinelinux.org/alpine/aports/-/issues" diff --git a/internal/image/fixtures/test-alpine.Dockerfile b/internal/image/fixtures/test-alpine.Dockerfile index 5cf22e28124..d6aa79f1c81 100644 --- a/internal/image/fixtures/test-alpine.Dockerfile +++ b/internal/image/fixtures/test-alpine.Dockerfile @@ -1,4 +1,5 @@ FROM alpine:3.10@sha256:451eee8bedcb2f029756dc3e9d73bab0e7943c1ac55cff3a4861c52a0fdd3e98 -# Switch the version to 3.19 to show the advisories published for the latest alpine versions -COPY "alpine-3.19-alpine-release" "/etc/alpine-release" +# Switch the version to 3.18 to show the advisories published for the latest alpine versions +COPY "alpine-3.18-alpine-release" "/etc/alpine-release" +COPY "alpine-3.18-os-release" "/etc/os-release" diff --git a/internal/image/fixtures/test-node_modules-npm-empty.Dockerfile b/internal/image/fixtures/test-node_modules-npm-empty.Dockerfile index aa559ba2850..67ff3b79f70 100644 --- a/internal/image/fixtures/test-node_modules-npm-empty.Dockerfile +++ b/internal/image/fixtures/test-node_modules-npm-empty.Dockerfile @@ -2,7 +2,7 @@ ARG MANAGER_VERSION="10.2.4" FROM node:20-alpine@sha256:c0a3badbd8a0a760de903e00cedbca94588e609299820557e72cba2a53dbaa2c -WORKDIR /usr/app +WORKDIR /prod/app # install the desired package manager RUN npm i -g "npm@$MANAGER_VERSION" diff --git a/internal/image/fixtures/test-node_modules-npm-full.Dockerfile b/internal/image/fixtures/test-node_modules-npm-full.Dockerfile index df412b7a124..96e136b5f7f 100644 --- a/internal/image/fixtures/test-node_modules-npm-full.Dockerfile +++ b/internal/image/fixtures/test-node_modules-npm-full.Dockerfile @@ -2,7 +2,7 @@ ARG MANAGER_VERSION="10.2.4" FROM node:20-alpine@sha256:c0a3badbd8a0a760de903e00cedbca94588e609299820557e72cba2a53dbaa2c -WORKDIR /usr/app +WORKDIR /prod/app # install the desired package manager RUN npm i -g "npm@$MANAGER_VERSION" diff --git a/internal/image/fixtures/test-node_modules-pnpm-empty.Dockerfile b/internal/image/fixtures/test-node_modules-pnpm-empty.Dockerfile index 8912eef5d07..7a221ca7ea9 100644 --- a/internal/image/fixtures/test-node_modules-pnpm-empty.Dockerfile +++ b/internal/image/fixtures/test-node_modules-pnpm-empty.Dockerfile @@ -2,7 +2,7 @@ ARG MANAGER_VERSION="8.15.4" FROM node:20-alpine@sha256:c0a3badbd8a0a760de903e00cedbca94588e609299820557e72cba2a53dbaa2c -WORKDIR /usr/app +WORKDIR /prod/app # install the desired package manager RUN npm i -g "pnpm@$MANAGER_VERSION" diff --git a/internal/image/fixtures/test-node_modules-pnpm-full.Dockerfile b/internal/image/fixtures/test-node_modules-pnpm-full.Dockerfile index 97a37c652a0..80e1ee6519c 100644 --- a/internal/image/fixtures/test-node_modules-pnpm-full.Dockerfile +++ b/internal/image/fixtures/test-node_modules-pnpm-full.Dockerfile @@ -2,7 +2,7 @@ ARG MANAGER_VERSION="8.15.4" FROM node:20-alpine@sha256:c0a3badbd8a0a760de903e00cedbca94588e609299820557e72cba2a53dbaa2c -WORKDIR /usr/app +WORKDIR /prod/app # install the desired package manager RUN npm i -g "pnpm@$MANAGER_VERSION" diff --git a/internal/image/fixtures/test-node_modules-yarn-empty.Dockerfile b/internal/image/fixtures/test-node_modules-yarn-empty.Dockerfile index 7158d5d2584..41f4c2f4239 100644 --- a/internal/image/fixtures/test-node_modules-yarn-empty.Dockerfile +++ b/internal/image/fixtures/test-node_modules-yarn-empty.Dockerfile @@ -2,7 +2,7 @@ ARG MANAGER_VERSION="1.22.22" FROM node:20-alpine@sha256:c0a3badbd8a0a760de903e00cedbca94588e609299820557e72cba2a53dbaa2c -WORKDIR /usr/app +WORKDIR /prod/app # install the desired package manager RUN npm i -g "yarn@$MANAGER_VERSION" --force diff --git a/internal/image/fixtures/test-node_modules-yarn-full.Dockerfile b/internal/image/fixtures/test-node_modules-yarn-full.Dockerfile index 54889d6804b..99e9653f01d 100644 --- a/internal/image/fixtures/test-node_modules-yarn-full.Dockerfile +++ b/internal/image/fixtures/test-node_modules-yarn-full.Dockerfile @@ -2,7 +2,7 @@ ARG MANAGER_VERSION="1.22.22" FROM node:20-alpine@sha256:c0a3badbd8a0a760de903e00cedbca94588e609299820557e72cba2a53dbaa2c -WORKDIR /usr/app +WORKDIR /prod/app # install the desired package manager RUN npm i -g "yarn@$MANAGER_VERSION" --force diff --git a/internal/image/image.go b/internal/image/image.go index be3bd3171ef..0be6f53bf23 100644 --- a/internal/image/image.go +++ b/internal/image/image.go @@ -11,9 +11,9 @@ import ( "path/filepath" "strings" - "github.com/dghubble/trie" v1 "github.com/google/go-containerregistry/pkg/v1" "github.com/google/go-containerregistry/pkg/v1/tarball" + "github.com/google/osv-scanner/internal/image/pathtree" "github.com/google/osv-scanner/pkg/lockfile" ) @@ -112,7 +112,7 @@ func LoadImage(imagePath string) (*Image, error) { } outputImage.layers[i] = Layer{ - fileNodeTrie: trie.NewPathTrie(), + fileNodeTrie: pathtree.NewNode[FileNode](), id: hash.Hex, rootImage: &outputImage, } @@ -235,7 +235,7 @@ func LoadImage(imagePath string) (*Image, error) { continue } - currentMap.fileNodeTrie.Put(virtualPath, FileNode{ + err := currentMap.fileNodeTrie.Insert(virtualPath, &FileNode{ rootImage: &outputImage, // Select the original layer of the file originLayer: &outputImage.layers[i], @@ -244,6 +244,10 @@ func LoadImage(imagePath string) (*Image, error) { isWhiteout: tombstone, permission: fs.FileMode(header.Mode), //nolint:gosec }) + + if err != nil { + return &outputImage, fmt.Errorf("image tar has repeated files: %w", err) + } } } @@ -260,13 +264,12 @@ func inWhiteoutDir(fileMap Layer, filePath string) bool { if filePath == "" { break } - dirname := filepath.Dir(filePath) + dirname := path.Dir(filePath) if filePath == dirname { break } - val := fileMap.fileNodeTrie.Get(dirname) - item, ok := val.(FileNode) - if ok && item.isWhiteout { + node := fileMap.fileNodeTrie.Get(dirname) + if node != nil && node.isWhiteout { return true } filePath = dirname diff --git a/internal/image/image_test.go b/internal/image/image_test.go index 90bd0285249..bc4397ab4e0 100644 --- a/internal/image/image_test.go +++ b/internal/image/image_test.go @@ -3,7 +3,6 @@ package image_test import ( "errors" "os" - "sort" "testing" "github.com/google/osv-scanner/internal/image" @@ -94,10 +93,6 @@ func TestScanImage(t *testing.T) { } } - sort.Slice(got.Lockfiles, func(i, j int) bool { - return got.Lockfiles[i].FilePath < got.Lockfiles[j].FilePath - }) - tt.want.MatchJSON(t, got) }) } diff --git a/internal/image/layer.go b/internal/image/layer.go index 9e100dc03f7..eb66a37752b 100644 --- a/internal/image/layer.go +++ b/internal/image/layer.go @@ -3,9 +3,14 @@ package image import ( "io/fs" "os" + "strings" + "time" + + // Note that paths accessing the disk must use filepath, but all virtual paths should use path + "path" "path/filepath" - "github.com/dghubble/trie" + "github.com/google/osv-scanner/internal/image/pathtree" ) type fileType int @@ -26,6 +31,69 @@ type FileNode struct { permission fs.FileMode } +var _ fs.DirEntry = FileNode{} + +func (f FileNode) IsDir() bool { + return f.fileType == Dir +} + +func (f FileNode) Name() string { + return path.Base(f.virtualPath) +} + +func (f FileNode) Type() fs.FileMode { + return f.permission +} + +func (f FileNode) Info() (fs.FileInfo, error) { + return f.Stat() +} + +type FileNodeFileInfo struct { + baseFileInfo fs.FileInfo + fileNode *FileNode +} + +var _ fs.FileInfo = FileNodeFileInfo{} + +func (f FileNodeFileInfo) Name() string { + return path.Base(f.fileNode.virtualPath) +} + +func (f FileNodeFileInfo) Size() int64 { + return f.baseFileInfo.Size() +} + +func (f FileNodeFileInfo) Mode() fs.FileMode { + return f.fileNode.permission +} + +func (f FileNodeFileInfo) ModTime() time.Time { + return f.baseFileInfo.ModTime() +} + +func (f FileNodeFileInfo) IsDir() bool { + return f.fileNode.fileType == Dir +} + +func (f FileNodeFileInfo) Sys() any { + return nil +} + +// Stat returns the FileInfo structure describing file. +func (f *FileNode) Stat() (fs.FileInfo, error) { + baseFileInfo, err := os.Stat(f.absoluteDiskPath()) + if err != nil { + return nil, err + } + + return FileNodeFileInfo{ + baseFileInfo: baseFileInfo, + fileNode: f, + }, nil +} + +// Open returns a file handle for the file func (f *FileNode) Open() (*os.File, error) { if f.isWhiteout { return nil, fs.ErrNotExist @@ -42,35 +110,74 @@ func (f *FileNode) absoluteDiskPath() string { type Layer struct { // id is the sha256 digest of the layer id string - fileNodeTrie *trie.PathTrie + fileNodeTrie *pathtree.Node[FileNode] rootImage *Image // TODO: Use hashmap to speed up path lookups } -func (filemap Layer) getFileNode(path string) (FileNode, error) { - node, ok := filemap.fileNodeTrie.Get(path).(FileNode) - if !ok { - return FileNode{}, fs.ErrNotExist +func (filemap Layer) Open(path string) (fs.File, error) { + node, err := filemap.getFileNode(path) + if err != nil { + return nil, err + } + + return node.Open() +} + +func (filemap Layer) Stat(path string) (fs.FileInfo, error) { + node, err := filemap.getFileNode(path) + if err != nil { + return nil, err + } + + return node.Stat() +} + +func (filemap Layer) ReadDir(path string) ([]fs.DirEntry, error) { + children := filemap.fileNodeTrie.GetChildren(path) + output := make([]fs.DirEntry, 0, len(children)) + for _, node := range children { + output = append(output, node) + } + + return output, nil +} + +var _ fs.FS = Layer{} +var _ fs.StatFS = Layer{} +var _ fs.ReadDirFS = Layer{} + +func (filemap Layer) getFileNode(nodePath string) (*FileNode, error) { + // We expect all paths queried to be absolute paths rooted at the container root + // However, scalibr uses paths without a prepending /, because the paths are relative to Root. + // Root will always be '/' for container scanning, so prepend with / if necessary. + if !strings.HasPrefix(nodePath, "/") { + nodePath = path.Join("/", nodePath) + } + + node := filemap.fileNodeTrie.Get(nodePath) + if node == nil { + return nil, fs.ErrNotExist } return node, nil } // AllFiles return all files that exist on the layer the FileMap is representing -func (filemap Layer) AllFiles() []FileNode { - allFiles := []FileNode{} +func (filemap Layer) AllFiles() []*FileNode { + allFiles := []*FileNode{} // No need to check error since we are not returning any errors - _ = filemap.fileNodeTrie.Walk(func(_ string, value interface{}) error { - node := value.(FileNode) + _ = filemap.fileNodeTrie.Walk(func(_ string, node *FileNode) error { if node.fileType != RegularFile { // Only add regular files return nil } + // TODO: Check if parent is an opaque whiteout if node.isWhiteout { // Don't add whiteout files as they have been deleted return nil } - allFiles = append(allFiles, value.(FileNode)) + allFiles = append(allFiles, node) return nil }) diff --git a/internal/image/pathtree/pathtree.go b/internal/image/pathtree/pathtree.go new file mode 100644 index 00000000000..d14666a5a13 --- /dev/null +++ b/internal/image/pathtree/pathtree.go @@ -0,0 +1,133 @@ +// Package pathtree provides a tree structure for representing file paths. +// Each path segment is a node in the tree, enabling efficient storage +// and retrieval for building virtual file systems. +package pathtree + +import ( + "errors" + "fmt" + "strings" +) + +const divider string = "/" + +var ErrNodeAlreadyExists = errors.New("node already exists") + +// Root node represents the root directory / +type Node[V any] struct { + value *V + children map[string]*Node[V] +} + +func NewNode[V any]() *Node[V] { + return &Node[V]{ + children: make(map[string]*Node[V]), + } +} + +// Insert inserts a value into the tree at the given path. +// If a node already exists at the given path, an error is returned. +// +// If a file is inserted without also inserting the parent directory +// the parent directory entry will have a nil value. +func (node *Node[V]) Insert(path string, value *V) error { + path, err := cleanPath(path) + if err != nil { + return fmt.Errorf("Insert() error: %w", err) + } + + cursor := node + for _, segment := range strings.Split(path, divider) { + next, ok := cursor.children[segment] + // Create the segment if it doesn't exist + if !ok { + next = &Node[V]{ + value: nil, + children: make(map[string]*Node[V]), + } + cursor.children[segment] = next + } + cursor = next + } + + if cursor.value != nil { + return fmt.Errorf("%w: %v", ErrNodeAlreadyExists, divider+path) + } + + cursor.value = value + + return nil +} + +// Get retrieves the value at the given path. +// If no node exists at the given path, nil is returned. +func (node *Node[V]) Get(path string) *V { + path, _ = cleanPath(path) + + cursor := node + for _, segment := range strings.Split(path, divider) { + next, ok := cursor.children[segment] + if !ok { + return nil + } + cursor = next + } + + return cursor.value +} + +// Get retrieves all the direct children of this given path +func (node *Node[V]) GetChildren(path string) []*V { + path, _ = cleanPath(path) + + cursor := node + for _, segment := range strings.Split(path, divider) { + next, ok := cursor.children[segment] + if !ok { + return nil + } + cursor = next + } + + var children = make([]*V, 0, len(cursor.children)) + for _, child := range cursor.children { + // Some entries could be nil if a file is inserted without inserting the + // parent directories. + if child != nil { + children = append(children, child.value) + } + } + + return children +} + +// cleanPath returns a path for use in the tree +// additionally an error is returned if path is not formatted as expected +func cleanPath(inputPath string) (string, error) { + path, found := strings.CutPrefix(inputPath, divider) + if !found { + return "", fmt.Errorf("path %q is not an absolute path", inputPath) + } + path = strings.TrimSuffix(path, "/") + + return path, nil +} + +// Walk walks through all elements of this tree depths first, calling fn at every node +func (node *Node[V]) Walk(fn func(string, *V) error) error { + return node.walk("/", fn) +} + +func (node *Node[V]) walk(path string, fn func(string, *V) error) error { + for key, node := range node.children { + if err := fn(key, node.value); err != nil { + return err + } + err := node.walk(path+divider+key, fn) + if err != nil { + return err + } + } + + return nil +} diff --git a/internal/image/pathtree/pathtree_test.go b/internal/image/pathtree/pathtree_test.go new file mode 100644 index 00000000000..556c97545a8 --- /dev/null +++ b/internal/image/pathtree/pathtree_test.go @@ -0,0 +1,264 @@ +package pathtree_test + +import ( + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/google/osv-scanner/internal/image/pathtree" +) + +type testVal struct { + string +} + +func assertNoError(t *testing.T, err error) { + t.Helper() + + if err != nil { + t.Errorf("%v", err) + } +} + +func testTree(t *testing.T) *pathtree.Node[testVal] { + t.Helper() + + tree := pathtree.NewNode[testVal]() + assertNoError(t, tree.Insert("/a", &testVal{"value1"})) + assertNoError(t, tree.Insert("/a/b", &testVal{"value2"})) + assertNoError(t, tree.Insert("/a/b/c", &testVal{"value3"})) + assertNoError(t, tree.Insert("/a/b/d", &testVal{"value4"})) + assertNoError(t, tree.Insert("/a/e", &testVal{"value5"})) + assertNoError(t, tree.Insert("/a/e/f", &testVal{"value6"})) + assertNoError(t, tree.Insert("/a/b/d/f", &testVal{"value7"})) + assertNoError(t, tree.Insert("/a/g", &testVal{"value8"})) + + return tree +} + +func TestNode_Insert_Error(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + tree *pathtree.Node[testVal] + key string + val *testVal + }{ + { + name: "duplicate node", + tree: func() *pathtree.Node[testVal] { + tree := pathtree.NewNode[testVal]() + _ = tree.Insert("/a", &testVal{"value1"}) + + return tree + }(), + key: "/a", + val: &testVal{"value2"}, + }, + { + name: "duplicate node in subtree", + tree: func() *pathtree.Node[testVal] { + tree := pathtree.NewNode[testVal]() + _ = tree.Insert("/a", &testVal{"value1"}) + _ = tree.Insert("/a/b", &testVal{"value2"}) + + return tree + }(), + key: "/a/b", + val: &testVal{"value3"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + err := tt.tree.Insert(tt.key, tt.val) + if err == nil { + t.Errorf("Node.Insert() expected error, got nil") + } + }) + } +} + +func TestNode_Get(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + tree *pathtree.Node[testVal] + key string + want *testVal + }{ + { + name: "empty tree", + tree: pathtree.NewNode[testVal](), + key: "/a", + want: nil, + }, + { + name: "single node", + tree: func() *pathtree.Node[testVal] { + tree := pathtree.NewNode[testVal]() + _ = tree.Insert("/a", &testVal{"value"}) + + return tree + }(), + key: "/a", + want: &testVal{"value"}, + }, + { + name: "non-existent node in single node tree", + tree: func() *pathtree.Node[testVal] { + tree := pathtree.NewNode[testVal]() + _ = tree.Insert("/a", &testVal{"value"}) + + return tree + }(), + key: "/b", + want: nil, + }, + { + name: "multiple nodes", + tree: testTree(t), + key: "/a/b/c", + want: &testVal{"value3"}, + }, + { + name: "non-existent node", + tree: testTree(t), + key: "/a/b/g", + want: nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + got := tt.tree.Get(tt.key) + if diff := cmp.Diff(tt.want, got, cmp.AllowUnexported(testVal{})); diff != "" { + t.Errorf("Node.Get() (-want +got): %v", diff) + } + }) + } +} + +func TestNode_GetChildren(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + tree *pathtree.Node[testVal] + key string + want []*testVal + }{ + { + name: "empty tree", + tree: pathtree.NewNode[testVal](), + key: "/a", + want: nil, + }, + { + name: "single node no children", + tree: func() *pathtree.Node[testVal] { + tree := pathtree.NewNode[testVal]() + _ = tree.Insert("/a", &testVal{"value"}) + + return tree + }(), + key: "/a", + want: []*testVal{}, + }, + { + name: "multiple nodes with children", + tree: testTree(t), + key: "/a/b", + want: []*testVal{ + {"value3"}, + {"value4"}, + }, + }, + { + name: "non-existent node", + tree: testTree(t), + key: "/a/b/g", + want: nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + got := tt.tree.GetChildren(tt.key) + if diff := cmp.Diff( + tt.want, + got, + cmp.AllowUnexported(testVal{}), + cmpopts.SortSlices(func(a, b *testVal) bool { + return strings.Compare(a.string, b.string) < 0 + })); diff != "" { + t.Errorf("Node.GetChildren() (-want +got): %v", diff) + } + }) + } +} + +func TestNode_Walk(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + tree *pathtree.Node[testVal] + want []string + }{ + { + name: "empty tree", + tree: pathtree.NewNode[testVal](), + want: []string{}, + }, + { + name: "single node", + tree: func() *pathtree.Node[testVal] { + tree := pathtree.NewNode[testVal]() + _ = tree.Insert("/a", &testVal{"value"}) + + return tree + }(), + want: []string{"value"}, + }, + { + name: "multiple nodes", + tree: testTree(t), + want: []string{ + "value1", + "value2", + "value3", + "value4", + "value5", + "value6", + "value7", + "value8", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + got := []string{} + err := tt.tree.Walk(func(_ string, node *testVal) error { + got = append(got, node.string) + return nil + }) + if err != nil { + t.Errorf("Node.Walk() error = %v", err) + } + if diff := cmp.Diff(tt.want, got, cmpopts.SortSlices(func(a, b string) bool { + return strings.Compare(a, b) < 0 + })); diff != "" { + t.Errorf("Node.Walk() (-want +got): %v", diff) + } + }) + } +} diff --git a/internal/image/scan.go b/internal/image/scan.go index 9bfc8ae02d9..ccbd398b578 100644 --- a/internal/image/scan.go +++ b/internal/image/scan.go @@ -1,14 +1,21 @@ package image import ( + "cmp" "errors" "fmt" "io/fs" "log" + "path" + "slices" + "strings" + "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scanner/internal/lockfilescalibr" "github.com/google/osv-scanner/pkg/lockfile" "github.com/google/osv-scanner/pkg/models" "github.com/google/osv-scanner/pkg/reporter" + "golang.org/x/exp/maps" ) // ScanImage scans an exported docker image .tar file @@ -22,33 +29,105 @@ func ScanImage(r reporter.Reporter, imagePath string) (ScanResults, error) { allFiles := img.LastLayer().AllFiles() - scannedLockfiles := ScanResults{ + scanResults := ScanResults{ ImagePath: imagePath, } + + inventories := []*extractor.Inventory{} + for _, file := range allFiles { if file.fileType != RegularFile { continue } - parsedLockfile, err := extractArtifactDeps(file.virtualPath, img.LastLayer()) + + // TODO: Currently osv-scalibr does not correctly annotate OS packages + // causing artifact extractors to double extract elements here. + // So let's skip all these directories for now. + // See (b/364536788) + // + // https://en.wikipedia.org/wiki/Filesystem_Hierarchy_Standard + // > Secondary hierarchy for read-only user data; contains the majority of (multi-)user utilities and applications. + // > Should be shareable and read-only. + // + if strings.HasPrefix(file.virtualPath, "/usr/") { + continue + } + + extractedInventories, err := extractArtifactDeps(file.virtualPath, img.LastLayer()) if err != nil { - if !errors.Is(err, lockfile.ErrExtractorNotFound) { + if !errors.Is(err, lockfilescalibr.ErrExtractorNotFound) { r.Errorf("Attempted to extract lockfile but failed: %s - %v\n", file.virtualPath, err) } continue } + inventories = append(inventories, extractedInventories...) + } + + // TODO: Remove the lockfile.Lockfile conversion + // Temporarily convert back to lockfile.Lockfiles to minimize snapshot changes + // This is done to verify the scanning behavior have not changed with this refactor + // and to minimize changes in the initial PR. + lockfiles := map[string]lockfile.Lockfile{} + for _, i := range inventories { + if len(i.Annotations) > 1 { + log.Printf("%v", i.Annotations) + } + lf, exists := lockfiles[path.Join("/", i.Locations[0])] + if !exists { + lf = lockfile.Lockfile{ + FilePath: path.Join("/", i.Locations[0]), + ParsedAs: i.Extractor.Name(), + } + } + + pkg := lockfile.PackageDetails{ + Name: i.Name, + Version: i.Version, + Ecosystem: lockfile.Ecosystem(i.Ecosystem()), + CompareAs: lockfile.Ecosystem(strings.Split(i.Ecosystem(), ":")[0]), + } + if i.SourceCode != nil { + pkg.Commit = i.SourceCode.Commit + } - scannedLockfiles.Lockfiles = append(scannedLockfiles.Lockfiles, parsedLockfile) + lf.Packages = append(lf.Packages, pkg) + + lockfiles[path.Join("/", i.Locations[0])] = lf + } + + for _, l := range lockfiles { + slices.SortFunc(l.Packages, func(a, b lockfile.PackageDetails) int { + return cmp.Or( + strings.Compare(a.Name, b.Name), + strings.Compare(a.Version, b.Version), + ) + }) } - traceOrigin(img, &scannedLockfiles) + scanResults.Lockfiles = maps.Values(lockfiles) + slices.SortFunc(scanResults.Lockfiles, func(a, b lockfile.Lockfile) int { + return strings.Compare(a.FilePath, b.FilePath) + }) + + traceOrigin(img, &scanResults) + + // TODO: Reenable this sort when removing lockfile.Lockfile + // Sort to have deterministic output, and to match behavior of lockfile.extractDeps + // slices.SortFunc(scanResults.Inventories, func(a, b *extractor.Inventory) int { + // // TODO: Should we consider errors here? + // aPURL, _ := a.Extractor.ToPURL(a) + // bPURL, _ := b.Extractor.ToPURL(b) + + // return strings.Compare(aPURL.ToString(), bPURL.ToString()) + // }) err = img.Cleanup() if err != nil { err = fmt.Errorf("failed to cleanup: %w", img.Cleanup()) } - return scannedLockfiles, err + return scanResults, err } // traceOrigin fills out the originLayerID for each package in ScanResults @@ -60,15 +139,30 @@ func traceOrigin(img *Image, scannedLockfiles *ScanResults) { Name string Version string Commit string - Ecosystem lockfile.Ecosystem + Ecosystem string } + // TODO: Remove this function after fully migrating to extractor.Inventory makePDKey := func(pd lockfile.PackageDetails) PDKey { return PDKey{ Name: pd.Name, Version: pd.Version, Commit: pd.Commit, - Ecosystem: pd.Ecosystem, + Ecosystem: string(pd.Ecosystem), + } + } + + makePDKey2 := func(pd *extractor.Inventory) PDKey { + var commit string + if pd.SourceCode != nil { + commit = pd.SourceCode.Commit + } + + return PDKey{ + Name: pd.Name, + Version: pd.Version, + Commit: commit, + Ecosystem: pd.Ecosystem(), } } @@ -120,12 +214,11 @@ func traceOrigin(img *Image, scannedLockfiles *ScanResults) { // Failed to parse an older version of file in image // Behave as if the file does not exist break - // log.Panicf("unimplemented! failed to parse an older version of file in image: %s@%s: %v", file.FilePath, oldFileNode.originLayer.id, err) } // For each package in the old version, check if it existed in the newer layer, if so, the origin must be this layer or earlier. - for _, pkg := range oldDeps.Packages { - key := makePDKey(pkg) + for _, pkg := range oldDeps { + key := makePDKey2(pkg) if val, ok := sourceLayerIdx[key]; ok && val == prevLayerIdx { sourceLayerIdx[key] = layerIdx } diff --git a/internal/lockfilescalibr/errors.go b/internal/lockfilescalibr/errors.go new file mode 100644 index 00000000000..005ee0012b7 --- /dev/null +++ b/internal/lockfilescalibr/errors.go @@ -0,0 +1,9 @@ +package lockfilescalibr + +import "errors" + +var ErrIncompatibleFileFormat = errors.New("file format is incompatible, but this is expected") +var ErrNotImplemented = errors.New("not implemented") +var ErrWrongExtractor = errors.New("this extractor did not create this inventory") +var ErrExtractorNotFound = errors.New("could not determine extractor") +var ErrNoExtractorsFound = errors.New("no extractors found to be suitable to this file") diff --git a/internal/lockfilescalibr/language/java/pomxmlnet/extractor.go b/internal/lockfilescalibr/language/java/pomxmlnet/extractor.go new file mode 100644 index 00000000000..3a1a5f51c0c --- /dev/null +++ b/internal/lockfilescalibr/language/java/pomxmlnet/extractor.go @@ -0,0 +1,188 @@ +// Package pomxmlnet extracts Maven's pom.xml format with transitive dependency resolution. +package pomxmlnet + +import ( + "context" + "fmt" + "io/fs" + "path/filepath" + + "golang.org/x/exp/maps" + + mavenresolve "deps.dev/util/resolve/maven" + mavenutil "github.com/google/osv-scanner/internal/utility/maven" + + "deps.dev/util/maven" + "deps.dev/util/resolve" + "deps.dev/util/resolve/dep" + "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scalibr/extractor/filesystem" + "github.com/google/osv-scalibr/extractor/filesystem/osv" + "github.com/google/osv-scalibr/plugin" + "github.com/google/osv-scalibr/purl" + "github.com/google/osv-scanner/internal/resolution/client" + "github.com/google/osv-scanner/internal/resolution/datasource" +) + +// Extractor extracts osv packages from osv-scanner json output. +type Extractor struct { + client.DependencyClient + *datasource.MavenRegistryAPIClient +} + +// Name of the extractor. +func (e Extractor) Name() string { return "osv/pomxmlnet" } + +// Version of the extractor. +func (e Extractor) Version() int { return 0 } + +// Requirements of the extractor. +func (e Extractor) Requirements() *plugin.Capabilities { + return &plugin.Capabilities{ + Network: true, + } +} + +// FileRequired never returns true, as this is for the osv-scanner json output. +func (e Extractor) FileRequired(path string, _ fs.FileInfo) bool { + return filepath.Base(path) == "pom.xml" +} + +// Extract extracts packages from yarn.lock files passed through the scan input. +func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) ([]*extractor.Inventory, error) { + var project maven.Project + if err := datasource.NewMavenDecoder(input.Reader).Decode(&project); err != nil { + return nil, fmt.Errorf("could not extract from %s: %w", input.Path, err) + } + // Empty JDK and ActivationOS indicates merging the default profiles. + if err := project.MergeProfiles("", maven.ActivationOS{}); err != nil { + return nil, fmt.Errorf("failed to merge profiles: %w", err) + } + for _, repo := range project.Repositories { + if err := e.MavenRegistryAPIClient.AddRegistry(string(repo.URL)); err != nil { + return nil, fmt.Errorf("failed to add registry %s: %w", repo.URL, err) + } + } + // Merging parents data by parsing local parent pom.xml or fetching from upstream. + if err := mavenutil.MergeParents(ctx, e.MavenRegistryAPIClient, &project, project.Parent, 1, input.Path, true); err != nil { + return nil, fmt.Errorf("failed to merge parents: %w", err) + } + // Process the dependencies: + // - dedupe dependencies and dependency management + // - import dependency management + // - fill in missing dependency version requirement + project.ProcessDependencies(func(groupID, artifactID, version maven.String) (maven.DependencyManagement, error) { + return mavenutil.GetDependencyManagement(ctx, e.MavenRegistryAPIClient, groupID, artifactID, version) + }) + + if registries := e.MavenRegistryAPIClient.GetRegistries(); len(registries) > 0 { + clientRegs := make([]client.Registry, len(registries)) + for i, reg := range registries { + clientRegs[i] = client.Registry{URL: reg} + } + if err := e.DependencyClient.AddRegistries(clientRegs); err != nil { + return nil, err + } + } + + overrideClient := client.NewOverrideClient(e.DependencyClient) + resolver := mavenresolve.NewResolver(overrideClient) + + // Resolve the dependencies. + root := resolve.Version{ + VersionKey: resolve.VersionKey{ + PackageKey: resolve.PackageKey{ + System: resolve.Maven, + Name: project.ProjectKey.Name(), + }, + VersionType: resolve.Concrete, + Version: string(project.Version), + }} + reqs := make([]resolve.RequirementVersion, len(project.Dependencies)+len(project.DependencyManagement.Dependencies)) + for i, d := range project.Dependencies { + reqs[i] = resolve.RequirementVersion{ + VersionKey: resolve.VersionKey{ + PackageKey: resolve.PackageKey{ + System: resolve.Maven, + Name: d.Name(), + }, + VersionType: resolve.Requirement, + Version: string(d.Version), + }, + Type: resolve.MavenDepType(d, ""), + } + } + for i, d := range project.DependencyManagement.Dependencies { + reqs[len(project.Dependencies)+i] = resolve.RequirementVersion{ + VersionKey: resolve.VersionKey{ + PackageKey: resolve.PackageKey{ + System: resolve.Maven, + Name: d.Name(), + }, + VersionType: resolve.Requirement, + Version: string(d.Version), + }, + Type: resolve.MavenDepType(d, mavenutil.OriginManagement), + } + } + overrideClient.AddVersion(root, reqs) + + client.PreFetch(ctx, overrideClient, reqs, input.Path) + g, err := resolver.Resolve(ctx, root.VersionKey) + if err != nil { + return nil, fmt.Errorf("failed resolving %v: %w", root, err) + } + for i, e := range g.Edges { + e.Type = dep.Type{} + g.Edges[i] = e + } + + details := map[string]*extractor.Inventory{} + for i := 1; i < len(g.Nodes); i++ { + // Ignore the first node which is the root. + node := g.Nodes[i] + depGroups := []string{} + inventory := extractor.Inventory{ + Name: node.Version.Name, + Version: node.Version.Version, + // TODO(rexpan): Add merged paths in here as well + Locations: []string{input.Path}, + } + // We are only able to know dependency groups of direct dependencies but + // not transitive dependencies because the nodes in the resolve graph does + // not have the scope information. + for _, dep := range project.Dependencies { + if dep.Name() != inventory.Name { + continue + } + if dep.Scope != "" && dep.Scope != "compile" { + depGroups = append(depGroups, string(dep.Scope)) + } + } + inventory.Metadata = osv.DepGroupMetadata{ + DepGroupVals: depGroups, + } + details[inventory.Name] = &inventory + } + + return maps.Values(details), nil +} + +// ToPURL converts an inventory created by this extractor into a PURL. +func (e Extractor) ToPURL(i *extractor.Inventory) *purl.PackageURL { + return &purl.PackageURL{ + Type: purl.TypeMaven, + Name: i.Name, + Version: i.Version, + } +} + +// ToCPEs is not applicable as this extractor does not infer CPEs from the Inventory. +func (e Extractor) ToCPEs(_ *extractor.Inventory) []string { return []string{} } + +// Ecosystem returns the OSV ecosystem ('npm') of the software extracted by this extractor. +func (e Extractor) Ecosystem(_ *extractor.Inventory) string { + return "Maven" +} + +var _ filesystem.Extractor = Extractor{} diff --git a/internal/lockfilescalibr/language/java/pomxmlnet/extractor_test.go b/internal/lockfilescalibr/language/java/pomxmlnet/extractor_test.go new file mode 100644 index 00000000000..556663be75d --- /dev/null +++ b/internal/lockfilescalibr/language/java/pomxmlnet/extractor_test.go @@ -0,0 +1,366 @@ +package pomxmlnet_test + +import ( + "context" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scalibr/extractor/filesystem/osv" + "github.com/google/osv-scalibr/testing/extracttest" + "github.com/google/osv-scanner/internal/lockfilescalibr/language/java/pomxmlnet" + "github.com/google/osv-scanner/internal/resolution/clienttest" + "github.com/google/osv-scanner/internal/resolution/datasource" + "github.com/google/osv-scanner/internal/testutility" +) + +func TestMavenResolverExtractor_FileRequired(t *testing.T) { + t.Parallel() + + tests := []struct { + path string + want bool + }{ + { + path: "", + want: false, + }, + { + path: "pom.xml", + want: true, + }, + { + path: "path/to/my/pom.xml", + want: true, + }, + { + path: "path/to/my/pom.xml/file", + want: false, + }, + { + path: "path/to/my/pom.xml.file", + want: false, + }, + { + path: "path.to.my.pom.xml", + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.path, func(t *testing.T) { + t.Parallel() + e := pomxmlnet.Extractor{} + got := e.FileRequired(tt.path, nil) + if got != tt.want { + t.Errorf("Extract() got = %v, want %v", got, tt.want) + } + }) + } +} + +func TestExtractor_Extract(t *testing.T) { + t.Parallel() + + tests := []extracttest.TestTableEntry{ + { + Name: "Not a pom file", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/maven/not-pom.txt", + }, + WantErr: extracttest.ContainsErrStr{Str: "could not extract from"}, + }, + { + Name: "invalid xml syntax", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/maven/invalid-syntax.xml", + }, + WantErr: extracttest.ContainsErrStr{Str: "XML syntax error"}, + }, + { + Name: "empty", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/maven/empty.xml", + }, + WantInventory: []*extractor.Inventory{}, + }, + { + Name: "one package", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/maven/one-package.xml", + }, + WantInventory: []*extractor.Inventory{ + { + Name: "org.apache.maven:maven-artifact", + Version: "1.0.0", + Locations: []string{"testdata/maven/one-package.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + }, + }, + { + Name: "two packages", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/maven/two-packages.xml", + }, + WantInventory: []*extractor.Inventory{ + { + Name: "io.netty:netty-all", + Version: "4.1.42.Final", + Locations: []string{"testdata/maven/two-packages.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.slf4j:slf4j-log4j12", + Version: "1.7.25", + Locations: []string{"testdata/maven/two-packages.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + }, + }, + { + Name: "with dependency management", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/maven/with-dependency-management.xml", + }, + WantInventory: []*extractor.Inventory{ + { + Name: "io.netty:netty-all", + Version: "4.1.9", + Locations: []string{"testdata/maven/with-dependency-management.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.slf4j:slf4j-log4j12", + Version: "1.7.25", + Locations: []string{"testdata/maven/with-dependency-management.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + }, + }, + { + Name: "interpolation", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/maven/interpolation.xml", + }, + WantInventory: []*extractor.Inventory{ + { + Name: "org.mine:mypackage", + Version: "1.0.0", + Locations: []string{"testdata/maven/interpolation.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.mine:my.package", + Version: "2.3.4", + Locations: []string{"testdata/maven/interpolation.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.mine:ranged-package", + Version: "9.4.37", + Locations: []string{"testdata/maven/interpolation.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + }, + }, + { + Name: "with scope / dep groups", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/maven/with-scope.xml", + }, + WantInventory: []*extractor.Inventory{ + { + Name: "junit:junit", + Version: "4.12", + Locations: []string{"testdata/maven/with-scope.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{"runtime"}}, + }, + }, + }, + { + Name: "transitive dependencies", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/maven/transitive.xml", + }, + WantInventory: []*extractor.Inventory{ + { + Name: "org.direct:alice", + Version: "1.0.0", + Locations: []string{"testdata/maven/transitive.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.direct:bob", + Version: "2.0.0", + Locations: []string{"testdata/maven/transitive.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.direct:chris", + Version: "3.0.0", + Locations: []string{"testdata/maven/transitive.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.transitive:chuck", + Version: "1.1.1", + Locations: []string{"testdata/maven/transitive.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.transitive:dave", + Version: "2.2.2", + Locations: []string{"testdata/maven/transitive.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.transitive:eve", + Version: "3.3.3", + Locations: []string{"testdata/maven/transitive.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.transitive:frank", + Version: "4.4.4", + Locations: []string{"testdata/maven/transitive.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + t.Parallel() + + resolutionClient := clienttest.NewMockResolutionClient(t, "testdata/universe/basic-universe.yaml") + extr := pomxmlnet.Extractor{ + DependencyClient: resolutionClient, + MavenRegistryAPIClient: &datasource.MavenRegistryAPIClient{}, + } + + scanInput := extracttest.GenerateScanInputMock(t, tt.InputConfig) + defer extracttest.CloseTestScanInput(t, scanInput) + + got, err := extr.Extract(context.Background(), &scanInput) + + if diff := cmp.Diff(tt.WantErr, err, cmpopts.EquateErrors()); diff != "" { + t.Errorf("%s.Extract(%q) error diff (-want +got):\n%s", extr.Name(), tt.InputConfig.Path, diff) + return + } + + if diff := cmp.Diff(tt.WantInventory, got, cmpopts.SortSlices(extracttest.InventoryCmpLess)); diff != "" { + t.Errorf("%s.Extract(%q) diff (-want +got):\n%s", extr.Name(), tt.InputConfig.Path, diff) + } + }) + } +} + +func TestExtractor_Extract_WithMockServer(t *testing.T) { + t.Parallel() + + tt := extracttest.TestTableEntry{ + // Name: "with parent", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/maven/with-parent.xml", + }, + WantInventory: []*extractor.Inventory{ + { + Name: "org.alice:alice", + Version: "1.0.0", + Locations: []string{"testdata/maven/with-parent.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.bob:bob", + Version: "2.0.0", + Locations: []string{"testdata/maven/with-parent.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.chuck:chuck", + Version: "3.0.0", + Locations: []string{"testdata/maven/with-parent.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.dave:dave", + Version: "4.0.0", + Locations: []string{"testdata/maven/with-parent.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.eve:eve", + Version: "5.0.0", + Locations: []string{"testdata/maven/with-parent.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.frank:frank", + Version: "6.0.0", + Locations: []string{"testdata/maven/with-parent.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + }, + } + + srv := testutility.NewMockHTTPServer(t) + srv.SetResponse(t, "org/upstream/parent-pom/1.0/parent-pom-1.0.pom", []byte(` + + org.upstream + parent-pom + 1.0 + pom + + + org.eve + eve + 5.0.0 + + + + `)) + srv.SetResponse(t, "org/import/import/1.2.3/import-1.2.3.pom", []byte(` + + org.import + import + 1.2.3 + pom + + + + org.frank + frank + 6.0.0 + + + + + `)) + + apiClient, err := datasource.NewMavenRegistryAPIClient(srv.URL) + if err != nil { + t.Fatalf("%v", err) + } + + resolutionClient := clienttest.NewMockResolutionClient(t, "testdata/universe/basic-universe.yaml") + extr := pomxmlnet.Extractor{ + DependencyClient: resolutionClient, + MavenRegistryAPIClient: apiClient, + } + + scanInput := extracttest.GenerateScanInputMock(t, tt.InputConfig) + defer extracttest.CloseTestScanInput(t, scanInput) + + got, err := extr.Extract(context.Background(), &scanInput) + + if diff := cmp.Diff(tt.WantErr, err, cmpopts.EquateErrors()); diff != "" { + t.Errorf("%s.Extract(%q) error diff (-want +got):\n%s", extr.Name(), tt.InputConfig.Path, diff) + return + } + + if diff := cmp.Diff(tt.WantInventory, got, cmpopts.SortSlices(extracttest.InventoryCmpLess)); diff != "" { + t.Errorf("%s.Extract(%q) diff (-want +got):\n%s", extr.Name(), tt.InputConfig.Path, diff) + } +} diff --git a/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/empty.xml b/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/empty.xml new file mode 100644 index 00000000000..8cfeebaaa4d --- /dev/null +++ b/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/empty.xml @@ -0,0 +1,7 @@ + + 4.0.0 + + com.mycompany.app + my-app + 1 + diff --git a/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/interpolation.xml b/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/interpolation.xml new file mode 100644 index 00000000000..6b7f761afc6 --- /dev/null +++ b/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/interpolation.xml @@ -0,0 +1,37 @@ + + + 4.0.0 + + io.library + my-library + 1.0-SNAPSHOT + jar + + + 1.0.0 + 2.3.4 + [9.4.35.v20201120,9.5) + + + + + org.mine + mypackage + ${mypackageVersion} + + + + org.mine + my.package + ${my.package.version} + + + + org.mine + ranged-package + ${version-range} + + + + diff --git a/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/invalid-syntax.xml b/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/invalid-syntax.xml new file mode 100644 index 00000000000..761a32c1abb --- /dev/null +++ b/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/invalid-syntax.xml @@ -0,0 +1,13 @@ + + + <${Id}.version>${project.version} + + + + + io.netty + netty-all + 4.1.42.Final + + + diff --git a/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/not-pom.txt b/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/not-pom.txt new file mode 100644 index 00000000000..f9df712bcb2 --- /dev/null +++ b/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/not-pom.txt @@ -0,0 +1 @@ +this is not a pom.xml file! diff --git a/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/one-package.xml b/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/one-package.xml new file mode 100644 index 00000000000..bbb1359e9d5 --- /dev/null +++ b/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/one-package.xml @@ -0,0 +1,17 @@ + + com.mycompany.app + my-app + 1.0 + + + 3.0 + + + + + org.apache.maven + maven-artifact + 1.0.0 + + + diff --git a/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/parent/pom.xml b/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/parent/pom.xml new file mode 100644 index 00000000000..3751df6be32 --- /dev/null +++ b/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/parent/pom.xml @@ -0,0 +1,21 @@ + + org.local + parent-pom + 1.0 + + pom + + + org.upstream + parent-pom + 1.0 + + + + + org.dave + dave + 4.0.0 + + + diff --git a/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/transitive.xml b/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/transitive.xml new file mode 100644 index 00000000000..52e416a0bcd --- /dev/null +++ b/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/transitive.xml @@ -0,0 +1,33 @@ + + com.mycompany.app + my-app + 1.0 + + + + + org.transitive + frank + 4.4.4 + + + + + + + org.direct + alice + 1.0.0 + + + org.direct + bob + 2.0.0 + + + org.direct + chris + 3.0.0 + + + diff --git a/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/two-packages.xml b/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/two-packages.xml new file mode 100644 index 00000000000..897f648a1e4 --- /dev/null +++ b/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/two-packages.xml @@ -0,0 +1,22 @@ + + com.mycompany.app + my-app + 1.0 + + + 3.0 + + + + + io.netty + netty-all + 4.1.42.Final + + + org.slf4j + slf4j-log4j12 + 1.7.25 + + + diff --git a/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/with-dependency-management.xml b/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/with-dependency-management.xml new file mode 100644 index 00000000000..1928688e949 --- /dev/null +++ b/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/with-dependency-management.xml @@ -0,0 +1,37 @@ + + com.mycompany.app + my-app + 1.0 + + + 3.0 + + + + + io.netty + netty-all + 4.1.9 + + + org.slf4j + slf4j-log4j12 + 1.7.25 + + + + + + + io.netty + netty-all + 4.1.42.Final + + + com.google.code.findbugs + jsr305 + 3.0.2 + + + + diff --git a/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/with-parent.xml b/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/with-parent.xml new file mode 100644 index 00000000000..602b8b877f1 --- /dev/null +++ b/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/with-parent.xml @@ -0,0 +1,54 @@ + + com.mycompany.app + my-app + 1.0 + + + org.local + parent-pom + 1.0 + ./parent/pom.xml + + + + 2.0.0 + + + + + org.alice + alice + 1.0.0 + + + org.bob + bob + ${bob.version} + + + org.chuck + chuck + + + org.frank + frank + + + + + + + org.chuck + chuck + 3.0.0 + + + org.import + import + 1.2.3 + pom + import + + + + diff --git a/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/with-scope.xml b/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/with-scope.xml new file mode 100644 index 00000000000..688c6bb7bc2 --- /dev/null +++ b/internal/lockfilescalibr/language/java/pomxmlnet/testdata/maven/with-scope.xml @@ -0,0 +1,14 @@ + + com.mycompany.app + my-app + 1.0 + + + + junit + junit + 4.12 + runtime + + + diff --git a/internal/lockfilescalibr/language/java/pomxmlnet/testdata/universe/basic-universe.yaml b/internal/lockfilescalibr/language/java/pomxmlnet/testdata/universe/basic-universe.yaml new file mode 100644 index 00000000000..2bf2b32724a --- /dev/null +++ b/internal/lockfilescalibr/language/java/pomxmlnet/testdata/universe/basic-universe.yaml @@ -0,0 +1,60 @@ +system: maven +schema: | + com.google.code.findbugs:jsr305 + 3.0.2 + io.netty:netty-all + 4.1.9 + 4.1.42.Final + junit:junit + 4.12 + org.alice:alice + 1.0.0 + org.apache.maven:maven-artifact + 1.0.0 + org.bob:bob + 2.0.0 + org.chuck:chuck + 3.0.0 + org.dave:dave + 4.0.0 + org.direct:alice + 1.0.0 + org.transitive:chuck@1.1.1 + org.transitive:dave@2.2.2 + org.direct:bob + 2.0.0 + org.transitive:eve@3.3.3 + org.direct:chris + 3.0.0 + org.transitive:frank@3.3.3 + org.eve:eve + 5.0.0 + org.frank:frank + 6.0.0 + org.mine:my.package + 2.3.4 + org.mine:mypackage + 1.0.0 + org.mine:ranged-package + 9.4.35 + 9.4.36 + 9.4.37 + 9.5 + org.slf4j:slf4j-log4j12 + 1.7.25 + org.transitive:chuck + 1.1.1 + 2.2.2 + org.transitive:eve@2.2.2 + 3.3.3 + org.transitive:dave + 1.1.1 + 2.2.2 + 3.3.3 + org.transitive:eve + 1.1.1 + 2.2.2 + 3.3.3 + org.transitive:frank + 3.3.3 + 4.4.4 diff --git a/internal/lockfilescalibr/language/javascript/nodemodules/extractor.go b/internal/lockfilescalibr/language/javascript/nodemodules/extractor.go new file mode 100644 index 00000000000..a965b2fecd2 --- /dev/null +++ b/internal/lockfilescalibr/language/javascript/nodemodules/extractor.go @@ -0,0 +1,57 @@ +package nodemodules + +import ( + "context" + "io/fs" + "path/filepath" + + "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scalibr/extractor/filesystem" + "github.com/google/osv-scalibr/extractor/filesystem/language/javascript/packagelockjson" + "github.com/google/osv-scalibr/plugin" + "github.com/google/osv-scalibr/purl" +) + +type Extractor struct { + actualExtractor packagelockjson.Extractor +} + +var _ filesystem.Extractor = Extractor{} + +// Name of the extractor. +func (e Extractor) Name() string { return "javascript/nodemodules" } + +// Version of the extractor. +func (e Extractor) Version() int { return 0 } + +// Requirements of the extractor. +func (e Extractor) Requirements() *plugin.Capabilities { + return &plugin.Capabilities{} +} + +// FileRequired returns true for .package-lock.json files under node_modules +func (e Extractor) FileRequired(path string, _ fs.FileInfo) bool { + return filepath.Base(filepath.Dir(path)) == "node_modules" && filepath.Base(path) == ".package-lock.json" +} + +// Extract extracts packages from yarn.lock files passed through the scan input. +func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) ([]*extractor.Inventory, error) { + return e.actualExtractor.Extract(ctx, input) +} + +// ToPURL converts an inventory created by this extractor into a PURL. +func (e Extractor) ToPURL(i *extractor.Inventory) *purl.PackageURL { + return e.actualExtractor.ToPURL(i) +} + +// ToCPEs is not applicable as this extractor does not infer CPEs from the Inventory. +func (e Extractor) ToCPEs(i *extractor.Inventory) []string { + return e.actualExtractor.ToCPEs(i) +} + +// Ecosystem returns the OSV ecosystem ('npm') of the software extracted by this extractor. +func (e Extractor) Ecosystem(i *extractor.Inventory) string { + return e.actualExtractor.Ecosystem(i) +} + +var _ filesystem.Extractor = Extractor{} diff --git a/internal/lockfilescalibr/language/osv/osvscannerjson/extractor.go b/internal/lockfilescalibr/language/osv/osvscannerjson/extractor.go new file mode 100644 index 00000000000..27de9b25806 --- /dev/null +++ b/internal/lockfilescalibr/language/osv/osvscannerjson/extractor.go @@ -0,0 +1,84 @@ +// Package osvscannerjson extracts osv-scanner's json output. +package osvscannerjson + +import ( + "context" + "encoding/json" + "fmt" + "io/fs" + + "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scalibr/extractor/filesystem" + "github.com/google/osv-scalibr/plugin" + "github.com/google/osv-scalibr/purl" + "github.com/google/osv-scanner/pkg/models" +) + +// Extractor extracts osv packages from osv-scanner json output. +type Extractor struct{} + +// Name of the extractor. +func (e Extractor) Name() string { return "osv/osvscannerjson" } + +// Version of the extractor. +func (e Extractor) Version() int { return 0 } + +// Requirements of the extractor. +func (e Extractor) Requirements() *plugin.Capabilities { + return &plugin.Capabilities{} +} + +// FileRequired never returns true, as this is for the osv-scanner json output. +func (e Extractor) FileRequired(_ string, _ fs.FileInfo) bool { + return false +} + +// Extract extracts packages from yarn.lock files passed through the scan input. +func (e Extractor) Extract(_ context.Context, input *filesystem.ScanInput) ([]*extractor.Inventory, error) { + parsedResults := models.VulnerabilityResults{} + err := json.NewDecoder(input.Reader).Decode(&parsedResults) + + if err != nil { + return nil, fmt.Errorf("could not extract from %s: %w", input.Path, err) + } + + packages := []*extractor.Inventory{} + for _, res := range parsedResults.Results { + for _, pkg := range res.Packages { + inventory := extractor.Inventory{ + Name: pkg.Package.Name, + Version: pkg.Package.Version, + Metadata: Metadata{ + Ecosystem: pkg.Package.Ecosystem, + SourceInfo: res.Source, + }, + Locations: []string{input.Path}, + } + if pkg.Package.Commit != "" { + inventory.SourceCode = &extractor.SourceCodeIdentifier{ + Commit: pkg.Package.Commit, + } + } + + packages = append(packages, &inventory) + } + } + + return packages, nil +} + +// ToPURL converts an inventory created by this extractor into a PURL. +func (e Extractor) ToPURL(_ *extractor.Inventory) *purl.PackageURL { + // TODO: support purl conversion + return nil +} + +// ToCPEs is not applicable as this extractor does not infer CPEs from the Inventory. +func (e Extractor) ToCPEs(_ *extractor.Inventory) []string { return []string{} } + +// Ecosystem returns the OSV ecosystem ('npm') of the software extracted by this extractor. +func (e Extractor) Ecosystem(i *extractor.Inventory) string { + return i.Metadata.(Metadata).Ecosystem +} + +var _ filesystem.Extractor = Extractor{} diff --git a/internal/lockfilescalibr/language/osv/osvscannerjson/extractor_test.go b/internal/lockfilescalibr/language/osv/osvscannerjson/extractor_test.go new file mode 100644 index 00000000000..65289c4d4cd --- /dev/null +++ b/internal/lockfilescalibr/language/osv/osvscannerjson/extractor_test.go @@ -0,0 +1,139 @@ +package osvscannerjson_test + +import ( + "context" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scalibr/testing/extracttest" + "github.com/google/osv-scanner/internal/lockfilescalibr/language/osv/osvscannerjson" + "github.com/google/osv-scanner/pkg/models" +) + +func TestExtractor_Extract(t *testing.T) { + t.Parallel() + + tests := []extracttest.TestTableEntry{ + { + Name: "invalid yaml", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/not-json.txt", + }, + WantErr: extracttest.ContainsErrStr{Str: "could not extract from"}, + }, + { + Name: "empty", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/empty.json", + }, + WantInventory: []*extractor.Inventory{}, + }, + { + Name: "one package", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/one-package.json", + }, + WantInventory: []*extractor.Inventory{ + { + Name: "activesupport", + Version: "7.0.7", + Locations: []string{"testdata/one-package.json"}, + Metadata: osvscannerjson.Metadata{ + Ecosystem: "RubyGems", + SourceInfo: models.SourceInfo{ + Path: "/path/to/Gemfile.lock", + Type: "lockfile", + }, + }, + }, + }, + }, + { + Name: "one package with commit", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/one-package-commit.json", + }, + WantInventory: []*extractor.Inventory{ + { + Locations: []string{"testdata/one-package-commit.json"}, + SourceCode: &extractor.SourceCodeIdentifier{ + Commit: "9a6bd55c9d0722cb101fe85a3b22d89e4ff4fe52", + }, + Metadata: osvscannerjson.Metadata{ + SourceInfo: models.SourceInfo{ + Path: "/path/to/Gemfile.lock", + Type: "lockfile", + }, + }, + }, + }, + }, + { + Name: "multiple packages", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/multiple-packages-with-vulns.json", + }, + WantInventory: []*extractor.Inventory{ + { + Name: "crossbeam-utils", + Version: "0.6.6", + Locations: []string{"testdata/multiple-packages-with-vulns.json"}, + Metadata: osvscannerjson.Metadata{ + Ecosystem: "crates.io", + SourceInfo: models.SourceInfo{ + Path: "/path/to/Cargo.lock", + Type: "lockfile", + }, + }, + }, + { + Name: "memoffset", + Version: "0.5.6", + Locations: []string{"testdata/multiple-packages-with-vulns.json"}, + Metadata: osvscannerjson.Metadata{ + Ecosystem: "crates.io", + SourceInfo: models.SourceInfo{ + Path: "/path/to/Cargo.lock", + Type: "lockfile", + }, + }, + }, + { + Name: "smallvec", + Version: "1.6.0", + Locations: []string{"testdata/multiple-packages-with-vulns.json"}, + Metadata: osvscannerjson.Metadata{ + Ecosystem: "crates.io", + SourceInfo: models.SourceInfo{ + Path: "/path/to/Cargo.lock", + Type: "lockfile", + }, + }, + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + t.Parallel() + extr := osvscannerjson.Extractor{} + + scanInput := extracttest.GenerateScanInputMock(t, tt.InputConfig) + defer extracttest.CloseTestScanInput(t, scanInput) + + got, err := extr.Extract(context.Background(), &scanInput) + + if diff := cmp.Diff(tt.WantErr, err, cmpopts.EquateErrors()); diff != "" { + t.Errorf("%s.Extract(%q) error diff (-want +got):\n%s", extr.Name(), tt.InputConfig.Path, diff) + return + } + + if diff := cmp.Diff(tt.WantInventory, got, cmpopts.SortSlices(extracttest.InventoryCmpLess)); diff != "" { + t.Errorf("%s.Extract(%q) diff (-want +got):\n%s", extr.Name(), tt.InputConfig.Path, diff) + } + }) + } +} diff --git a/internal/lockfilescalibr/language/osv/osvscannerjson/metadata.go b/internal/lockfilescalibr/language/osv/osvscannerjson/metadata.go new file mode 100644 index 00000000000..45c9e2c9664 --- /dev/null +++ b/internal/lockfilescalibr/language/osv/osvscannerjson/metadata.go @@ -0,0 +1,9 @@ +package osvscannerjson + +import "github.com/google/osv-scanner/pkg/models" + +// Metadata holds the metadata for osvscanner.json +type Metadata struct { + Ecosystem string + SourceInfo models.SourceInfo +} diff --git a/internal/lockfilescalibr/language/osv/osvscannerjson/testdata/empty.json b/internal/lockfilescalibr/language/osv/osvscannerjson/testdata/empty.json new file mode 100644 index 00000000000..a9452a2e2b0 --- /dev/null +++ b/internal/lockfilescalibr/language/osv/osvscannerjson/testdata/empty.json @@ -0,0 +1,3 @@ +{ + "results": [] +} \ No newline at end of file diff --git a/internal/lockfilescalibr/language/osv/osvscannerjson/testdata/multiple-packages-with-vulns.json b/internal/lockfilescalibr/language/osv/osvscannerjson/testdata/multiple-packages-with-vulns.json new file mode 100644 index 00000000000..c8610293687 --- /dev/null +++ b/internal/lockfilescalibr/language/osv/osvscannerjson/testdata/multiple-packages-with-vulns.json @@ -0,0 +1,504 @@ +{ + "results": [ + { + "source": { + "path": "/path/to/Cargo.lock", + "type": "lockfile" + }, + "packages": [ + { + "package": { + "name": "crossbeam-utils", + "version": "0.6.6", + "ecosystem": "crates.io" + }, + "vulnerabilities": [ + { + "modified": "2022-08-11T21:55:33Z", + "published": "2022-02-16T22:36:21Z", + "schema_version": "1.4.0", + "id": "GHSA-qc84-gqf4-9926", + "aliases": [ + "CVE-2022-23639" + ], + "summary": "crossbeam-utils Race Condition vulnerability", + "details": "### Impact\n\nThe affected version of this crate incorrectly assumed that the alignment of `{i,u}64` was always the same as `Atomic{I,U}64`. \n\nHowever, the alignment of `{i,u}64` on a 32-bit target can be smaller than `Atomic{I,U}64`.\n\nThis can cause the following problems:\n\n- Unaligned memory accesses\n- Data race\n\nCrates using `fetch_*` methods with `AtomicCell\u003c{i,u}64\u003e` are affected by this issue.\n\n32-bit targets without `Atomic{I,U}64` and 64-bit targets are not affected by this issue.\n32-bit targets with `Atomic{I,U}64` and `{i,u}64` have the same alignment are also not affected by this issue.\n\nThe following is a complete list of the builtin targets that may be affected. (last update: nightly-2022-02-11)\n\n- armv7-apple-ios (tier 3)\n- armv7s-apple-ios (tier 3)\n- i386-apple-ios (tier 3)\n- i586-unknown-linux-gnu\n- i586-unknown-linux-musl\n- i686-apple-darwin (tier 3)\n- i686-linux-android\n- i686-unknown-freebsd\n- i686-unknown-haiku (tier 3)\n- i686-unknown-linux-gnu\n- i686-unknown-linux-musl\n- i686-unknown-netbsd (tier 3)\n- i686-unknown-openbsd (tier 3)\n- i686-wrs-vxworks (tier 3)\n\n([script to get list](https://gist.github.com/taiki-e/3c7891e8c5f5e0cbcb44d7396aabfe10))\n\n### Patches\n\nThis has been fixed in crossbeam-utils 0.8.7.\n\nAffected 0.8.x releases have been yanked.\n\n### References\n\nhttps://github.com/crossbeam-rs/crossbeam/pull/781 \n\n### License\n\nThis advisory is in the public domain.", + "affected": [ + { + "package": { + "ecosystem": "crates.io", + "name": "crossbeam-utils", + "purl": "pkg:cargo/crossbeam-utils" + }, + "ranges": [ + { + "type": "SEMVER", + "events": [ + { + "introduced": "0" + }, + { + "fixed": "0.8.7" + } + ] + } + ], + "database_specific": { + "source": "https://github.com/github/advisory-database/blob/main/advisories/github-reviewed/2022/02/GHSA-qc84-gqf4-9926/GHSA-qc84-gqf4-9926.json" + } + } + ], + "severity": [ + { + "type": "CVSS_V3", + "score": "CVSS:3.1/AV:N/AC:H/PR:N/UI:N/S:U/C:H/I:H/A:H" + } + ], + "references": [ + { + "type": "WEB", + "url": "https://github.com/crossbeam-rs/crossbeam/security/advisories/GHSA-qc84-gqf4-9926" + }, + { + "type": "ADVISORY", + "url": "https://nvd.nist.gov/vuln/detail/CVE-2022-23639" + }, + { + "type": "WEB", + "url": "https://github.com/crossbeam-rs/crossbeam/pull/781" + }, + { + "type": "PACKAGE", + "url": "https://github.com/crossbeam-rs/crossbeam" + }, + { + "type": "WEB", + "url": "https://github.com/crossbeam-rs/crossbeam/releases/tag/crossbeam-utils-0.8.7" + }, + { + "type": "WEB", + "url": "https://rustsec.org/advisories/RUSTSEC-2022-0041.html" + } + ], + "database_specific": { + "cwe_ids": [ + "CWE-362" + ], + "github_reviewed": true, + "github_reviewed_at": "2022-02-16T22:36:21Z", + "nvd_published_at": "2022-02-15T19:15:00Z", + "severity": "HIGH" + } + }, + { + "modified": "2022-08-04T13:56:30Z", + "published": "2022-02-05T12:00:00Z", + "schema_version": "1.4.0", + "id": "RUSTSEC-2022-0041", + "aliases": [ + "GHSA-qc84-gqf4-9926", + "CVE-2022-23639" + ], + "summary": "Unsoundness of AtomicCell\u003c*64\u003e arithmetics on 32-bit targets that support Atomic*64", + "details": "## Impact\n\nAffected versions of this crate incorrectly assumed that the alignment of {i,u}64 was always the same as Atomic{I,U}64.\n\nHowever, the alignment of {i,u}64 on a 32-bit target can be smaller than Atomic{I,U}64.\n\nThis can cause the following problems:\n\n- Unaligned memory accesses\n- Data race\n\nCrates using fetch_* methods with AtomicCell\u003c{i,u}64\u003e are affected by this issue.\n\n32-bit targets without Atomic{I,U}64 and 64-bit targets are not affected by this issue.\n\n32-bit targets with Atomic{I,U}64 and {i,u}64 have the same alignment are also not affected by this issue.\n\nThe following is a complete list of the builtin targets that may be affected. (last update: nightly-2022-02-11)\n\n- armv7-apple-ios (tier 3)\n- armv7s-apple-ios (tier 3)\n- i386-apple-ios (tier 3)\n- i586-unknown-linux-gnu\n- i586-unknown-linux-musl\n- i686-apple-darwin (tier 3)\n- i686-linux-android\n- i686-unknown-freebsd\n- i686-unknown-haiku (tier 3)\n- i686-unknown-linux-gnu\n- i686-unknown-linux-musl\n- i686-unknown-netbsd (tier 3)\n- i686-unknown-openbsd (tier 3)\n- i686-wrs-vxworks (tier 3)\n\n([script to get list](https://gist.github.com/taiki-e/3c7891e8c5f5e0cbcb44d7396aabfe10))\n\n## Patches\n\nThis has been fixed in crossbeam-utils 0.8.7.\n\nAffected 0.8.x releases have been yanked.\n\nThanks to @taiki-e", + "affected": [ + { + "package": { + "ecosystem": "crates.io", + "name": "crossbeam-utils", + "purl": "pkg:cargo/crossbeam-utils" + }, + "ranges": [ + { + "type": "SEMVER", + "events": [ + { + "introduced": "0.0.0-0" + }, + { + "fixed": "0.8.7" + } + ] + } + ], + "database_specific": { + "categories": [ + "memory-corruption" + ], + "cvss": null, + "informational": "unsound", + "source": "https://github.com/rustsec/advisory-db/blob/osv/crates/RUSTSEC-2022-0041.json" + }, + "ecosystem_specific": { + "affects": { + "arch": [], + "functions": [], + "os": [] + } + } + } + ], + "references": [ + { + "type": "PACKAGE", + "url": "https://crates.io/crates/crossbeam-utils" + }, + { + "type": "ADVISORY", + "url": "https://rustsec.org/advisories/RUSTSEC-2022-0041.html" + }, + { + "type": "WEB", + "url": "https://github.com/crossbeam-rs/crossbeam/pull/781" + } + ] + } + ], + "groups": [ + { + "ids": [ + "GHSA-qc84-gqf4-9926", + "RUSTSEC-2022-0041" + ] + } + ] + }, + { + "package": { + "name": "memoffset", + "version": "0.5.6", + "ecosystem": "crates.io" + }, + "vulnerabilities": [ + { + "modified": "2023-06-21T22:06:29Z", + "published": "2023-06-21T22:06:29Z", + "schema_version": "1.4.0", + "id": "GHSA-wfg4-322g-9vqv", + "summary": "memoffset allows reading uninitialized memory", + "details": "memoffset allows attempt of reading data from address `0` with arbitrary type. This behavior is an undefined behavior because address `0` to `std::mem::size_of\u003cT\u003e` may not have valid bit-pattern with `T`. Old implementation dereferences uninitialized memory obtained from `std::mem::align_of`. Older implementation prior to it allows using uninitialized data obtained from `std::mem::uninitialized` with arbitrary type then compute offset by taking the address of field-projection. This may also result in an undefined behavior for \"father\" that includes (directly or transitively) type that [does not allow to be uninitialized](https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html).\n\nThis flaw was corrected by using `std::ptr::addr_of` in \u003chttps://github.com/Gilnaa/memoffset/pull/50\u003e.\n", + "affected": [ + { + "package": { + "ecosystem": "crates.io", + "name": "memoffset", + "purl": "pkg:cargo/memoffset" + }, + "ranges": [ + { + "type": "SEMVER", + "events": [ + { + "introduced": "0" + }, + { + "fixed": "0.6.2" + } + ] + } + ], + "database_specific": { + "source": "https://github.com/github/advisory-database/blob/main/advisories/github-reviewed/2023/06/GHSA-wfg4-322g-9vqv/GHSA-wfg4-322g-9vqv.json" + } + } + ], + "references": [ + { + "type": "WEB", + "url": "https://github.com/Gilnaa/memoffset/issues/24" + }, + { + "type": "WEB", + "url": "https://github.com/Gilnaa/memoffset/pull/50" + }, + { + "type": "PACKAGE", + "url": "https://github.com/Gilnaa/memoffset" + }, + { + "type": "WEB", + "url": "https://rustsec.org/advisories/RUSTSEC-2023-0045.html" + } + ], + "database_specific": { + "cwe_ids": [], + "github_reviewed": true, + "github_reviewed_at": "2023-06-21T22:06:29Z", + "nvd_published_at": null, + "severity": "MODERATE" + } + }, + { + "modified": "2023-07-08T12:30:19Z", + "published": "2023-06-21T12:00:00Z", + "schema_version": "1.4.0", + "id": "RUSTSEC-2023-0045", + "aliases": [ + "GHSA-wfg4-322g-9vqv" + ], + "summary": "memoffset allows reading uninitialized memory", + "details": "memoffset allows attempt of reading data from address `0` with arbitrary type. This behavior is an undefined behavior because address `0` to `std::mem::size_of\u003cT\u003e` may not have valid bit-pattern with `T`. Old implementation dereferences uninitialized memory obtained from `std::mem::align_of`. Older implementation prior to it allows using uninitialized data obtained from `std::mem::uninitialized` with arbitrary type then compute offset by taking the address of field-projection. This may also result in an undefined behavior for \"father\" that includes (directly or transitively) type that [does not allow to be uninitialized](https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html).\n\nThis flaw was corrected by using `std::ptr::addr_of` in \u003chttps://github.com/Gilnaa/memoffset/pull/50\u003e.", + "affected": [ + { + "package": { + "ecosystem": "crates.io", + "name": "memoffset", + "purl": "pkg:cargo/memoffset" + }, + "ranges": [ + { + "type": "SEMVER", + "events": [ + { + "introduced": "0.0.0-0" + }, + { + "fixed": "0.6.2" + } + ] + } + ], + "database_specific": { + "categories": [ + "memory-corruption" + ], + "cvss": null, + "informational": "unsound", + "source": "https://github.com/rustsec/advisory-db/blob/osv/crates/RUSTSEC-2023-0045.json" + }, + "ecosystem_specific": { + "affects": { + "arch": [], + "functions": [ + "memoffset::offset_of" + ], + "os": [] + } + } + } + ], + "references": [ + { + "type": "PACKAGE", + "url": "https://crates.io/crates/memoffset" + }, + { + "type": "ADVISORY", + "url": "https://rustsec.org/advisories/RUSTSEC-2023-0045.html" + }, + { + "type": "REPORT", + "url": "https://github.com/Gilnaa/memoffset/issues/24" + } + ] + } + ], + "groups": [ + { + "ids": [ + "GHSA-wfg4-322g-9vqv", + "RUSTSEC-2023-0045" + ] + } + ] + }, + { + "package": { + "name": "smallvec", + "version": "1.6.0", + "ecosystem": "crates.io" + }, + "vulnerabilities": [ + { + "modified": "2023-06-13T20:51:42Z", + "published": "2022-05-24T17:40:21Z", + "schema_version": "1.4.0", + "id": "GHSA-43w2-9j62-hq99", + "aliases": [ + "CVE-2021-25900" + ], + "summary": "Buffer overflow in SmallVec::insert_many", + "details": "A bug in the SmallVec::insert_many method caused it to allocate a buffer that was smaller than needed. It then wrote past the end of the buffer, causing a buffer overflow and memory corruption on the heap. This bug was only triggered if the iterator passed to insert_many yielded more items than the lower bound returned from its size_hint method.\n\nThe flaw was corrected in smallvec 0.6.14 and 1.6.1, by ensuring that additional space is always reserved for each item inserted. The fix also simplified the implementation of insert_many to use less unsafe code, so it is easier to verify its correctness.", + "affected": [ + { + "package": { + "ecosystem": "crates.io", + "name": "smallvec", + "purl": "pkg:cargo/smallvec" + }, + "ranges": [ + { + "type": "SEMVER", + "events": [ + { + "introduced": "0.6.3" + }, + { + "fixed": "0.6.14" + } + ] + } + ], + "database_specific": { + "source": "https://github.com/github/advisory-database/blob/main/advisories/github-reviewed/2022/05/GHSA-43w2-9j62-hq99/GHSA-43w2-9j62-hq99.json" + }, + "ecosystem_specific": { + "affected_functions": [ + "smallvec::SmallVec::insert_many" + ] + } + }, + { + "package": { + "ecosystem": "crates.io", + "name": "smallvec", + "purl": "pkg:cargo/smallvec" + }, + "ranges": [ + { + "type": "SEMVER", + "events": [ + { + "introduced": "1.0.0" + }, + { + "fixed": "1.6.1" + } + ] + } + ], + "database_specific": { + "source": "https://github.com/github/advisory-database/blob/main/advisories/github-reviewed/2022/05/GHSA-43w2-9j62-hq99/GHSA-43w2-9j62-hq99.json" + }, + "ecosystem_specific": { + "affected_functions": [ + "smallvec::SmallVec::insert_many" + ] + } + } + ], + "severity": [ + { + "type": "CVSS_V3", + "score": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H" + } + ], + "references": [ + { + "type": "ADVISORY", + "url": "https://nvd.nist.gov/vuln/detail/CVE-2021-25900" + }, + { + "type": "WEB", + "url": "https://github.com/servo/rust-smallvec/issues/252" + }, + { + "type": "PACKAGE", + "url": "https://github.com/servo/rust-smallvec" + }, + { + "type": "WEB", + "url": "https://rustsec.org/advisories/RUSTSEC-2021-0003.html" + } + ], + "database_specific": { + "cwe_ids": [ + "CWE-787" + ], + "github_reviewed": true, + "github_reviewed_at": "2022-06-17T00:20:48Z", + "nvd_published_at": "2021-01-26T18:16:00Z", + "severity": "CRITICAL" + } + }, + { + "modified": "2023-06-13T13:10:24Z", + "published": "2021-01-08T12:00:00Z", + "schema_version": "1.4.0", + "id": "RUSTSEC-2021-0003", + "aliases": [ + "CVE-2021-25900", + "GHSA-43w2-9j62-hq99" + ], + "summary": "Buffer overflow in SmallVec::insert_many", + "details": "A bug in the `SmallVec::insert_many` method caused it to allocate a buffer that was smaller than needed. It then wrote past the end of the buffer, causing a buffer overflow and memory corruption on the heap.\n\nThis bug was only triggered if the iterator passed to `insert_many` yielded more items than the lower bound returned from its `size_hint` method.\n \nThe flaw was corrected in smallvec 0.6.14 and 1.6.1, by ensuring that additional space is always reserved for each item inserted. The fix also simplified the implementation of `insert_many` to use less unsafe code, so it is easier to verify its correctness.\n\nThank you to Yechan Bae (@Qwaz) and the Rust group at Georgia Tech’s SSLab for finding and reporting this bug.", + "affected": [ + { + "package": { + "ecosystem": "crates.io", + "name": "smallvec", + "purl": "pkg:cargo/smallvec" + }, + "ranges": [ + { + "type": "SEMVER", + "events": [ + { + "introduced": "0.6.3" + }, + { + "fixed": "0.6.14" + }, + { + "introduced": "1.0.0" + }, + { + "fixed": "1.6.1" + } + ] + } + ], + "database_specific": { + "categories": [ + "memory-corruption" + ], + "cvss": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H", + "informational": null, + "source": "https://github.com/rustsec/advisory-db/blob/osv/crates/RUSTSEC-2021-0003.json" + }, + "ecosystem_specific": { + "affects": { + "arch": [], + "functions": [ + "smallvec::SmallVec::insert_many" + ], + "os": [] + } + } + } + ], + "severity": [ + { + "type": "CVSS_V3", + "score": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H" + } + ], + "references": [ + { + "type": "PACKAGE", + "url": "https://crates.io/crates/smallvec" + }, + { + "type": "ADVISORY", + "url": "https://rustsec.org/advisories/RUSTSEC-2021-0003.html" + }, + { + "type": "REPORT", + "url": "https://github.com/servo/rust-smallvec/issues/252" + } + ] + } + ], + "groups": [ + { + "ids": [ + "GHSA-43w2-9j62-hq99", + "RUSTSEC-2021-0003" + ] + } + ] + } + ] + } + ] +} \ No newline at end of file diff --git a/internal/lockfilescalibr/language/osv/osvscannerjson/testdata/not-json.txt b/internal/lockfilescalibr/language/osv/osvscannerjson/testdata/not-json.txt new file mode 100644 index 00000000000..319318e4d7d --- /dev/null +++ b/internal/lockfilescalibr/language/osv/osvscannerjson/testdata/not-json.txt @@ -0,0 +1 @@ +this is not valid json! (I think) diff --git a/internal/lockfilescalibr/language/osv/osvscannerjson/testdata/one-package-commit.json b/internal/lockfilescalibr/language/osv/osvscannerjson/testdata/one-package-commit.json new file mode 100644 index 00000000000..044efa3e483 --- /dev/null +++ b/internal/lockfilescalibr/language/osv/osvscannerjson/testdata/one-package-commit.json @@ -0,0 +1,19 @@ +{ + "results": [ + { + "source": { + "path": "/path/to/Gemfile.lock", + "type": "lockfile" + }, + "packages": [ + { + "package": { + "commit": "9a6bd55c9d0722cb101fe85a3b22d89e4ff4fe52" + }, + "vulnerabilities": [], + "groups": [] + } + ] + } + ] +} \ No newline at end of file diff --git a/internal/lockfilescalibr/language/osv/osvscannerjson/testdata/one-package.json b/internal/lockfilescalibr/language/osv/osvscannerjson/testdata/one-package.json new file mode 100644 index 00000000000..ceeca26123b --- /dev/null +++ b/internal/lockfilescalibr/language/osv/osvscannerjson/testdata/one-package.json @@ -0,0 +1,21 @@ +{ + "results": [ + { + "source": { + "path": "/path/to/Gemfile.lock", + "type": "lockfile" + }, + "packages": [ + { + "package": { + "name": "activesupport", + "version": "7.0.7", + "ecosystem": "RubyGems" + }, + "vulnerabilities": [], + "groups": [] + } + ] + } + ] +} \ No newline at end of file diff --git a/internal/lockfilescalibr/translation.go b/internal/lockfilescalibr/translation.go new file mode 100644 index 00000000000..3cc4c63137e --- /dev/null +++ b/internal/lockfilescalibr/translation.go @@ -0,0 +1,188 @@ +package lockfilescalibr + +import ( + "context" + "fmt" + "io/fs" + "os" + "sort" + + "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scalibr/extractor/filesystem" + "github.com/google/osv-scalibr/extractor/filesystem/language/dart/pubspec" + "github.com/google/osv-scalibr/extractor/filesystem/language/dotnet/packageslockjson" + "github.com/google/osv-scalibr/extractor/filesystem/language/erlang/mixlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/golang/gomod" + "github.com/google/osv-scalibr/extractor/filesystem/language/java/gradlelockfile" + "github.com/google/osv-scalibr/extractor/filesystem/language/java/gradleverificationmetadataxml" + "github.com/google/osv-scalibr/extractor/filesystem/language/java/pomxml" + "github.com/google/osv-scalibr/extractor/filesystem/language/javascript/packagelockjson" + "github.com/google/osv-scalibr/extractor/filesystem/language/javascript/pnpmlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/javascript/yarnlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/php/composerlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/python/pdmlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/python/pipfilelock" + "github.com/google/osv-scalibr/extractor/filesystem/language/python/poetrylock" + "github.com/google/osv-scalibr/extractor/filesystem/language/python/requirements" + "github.com/google/osv-scalibr/extractor/filesystem/language/r/renvlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/ruby/gemfilelock" + "github.com/google/osv-scalibr/extractor/filesystem/language/rust/cargolock" + + scalibrfs "github.com/google/osv-scalibr/fs" +) + +var lockfileExtractors = []filesystem.Extractor{ + conanlock.Extractor{}, + packageslockjson.Extractor{}, + mixlock.Extractor{}, + pubspec.Extractor{}, + gomod.Extractor{}, + pomxml.Extractor{}, + gradlelockfile.Extractor{}, + gradleverificationmetadataxml.Extractor{}, + packagelockjson.Extractor{}, + pnpmlock.Extractor{}, + yarnlock.Extractor{}, + composerlock.Extractor{}, + pipfilelock.Extractor{}, + pdmlock.Extractor{}, + poetrylock.Extractor{}, + requirements.Extractor{}, + renvlock.Extractor{}, + gemfilelock.Extractor{}, + cargolock.Extractor{}, +} + +var lockfileExtractorMapping = map[string]string{ + "pubspec.lock": "dart/pubspec", + "pnpm-lock.yaml": "javascript/pnpmlock", + "yarn.lock": "javascript/yarnlock", + "package-lock.json": "javascript/packagelockjson", + "pom.xml": "java/pomxml", + "buildscript-gradle.lockfile": "java/gradlelockfile", + "gradle.lockfile": "java/gradlelockfile", + "verification-metadata.xml": "java/gradleverificationmetadataxml", + "poetry.lock": "python/poetrylock", + "Pipfile.lock": "python/Pipfilelock", + "pdm.lock": "python/pdmlock", + "requirements.txt": "python/requirements", + "Cargo.lock": "rust/Cargolock", + "composer.lock": "php/composerlock", + "mix.lock": "erlang/mixlock", + "renv.lock": "r/renvlock", + "packages.lock.json": "dotnet/packageslockjson", + "conan.lock": "cpp/conanlock", + "go.mod": "go/gomod", + "Gemfile.lock": "ruby/gemfilelock", +} + +// ExtractWithExtractor attempts to extract the file at the given path with the extractor passed in +func ExtractWithExtractor(ctx context.Context, localPath string, ext filesystem.Extractor) ([]*extractor.Inventory, error) { + info, err := os.Stat(localPath) + if err != nil { + return nil, err + } + + return extractWithExtractor(ctx, localPath, info, ext) +} + +// Extract attempts to extract the file at the given path +// +// Args: +// - localPath: the path to the lockfile +// - extractAs: the name of the lockfile format to extract as (Using OSV-Scanner V1 extractor names) +// +// Returns: +// - []*extractor.Inventory: the extracted lockfile data +// - error: any errors encountered during extraction +// +// If extractAs is not specified, then the function will attempt to +// identify the lockfile format based on the file name. +// +// If no extractors are found, then ErrNoExtractorsFound is returned. +func Extract(ctx context.Context, localPath string, extractAs string) ([]*extractor.Inventory, error) { + info, err := os.Stat(localPath) + if err != nil { + return nil, err + } + + if extractAs != "" { + return extractAsSpecific(ctx, extractAs, localPath, info) + } + + output := []*extractor.Inventory{} + extractorFound := false + + for _, ext := range lockfileExtractors { + if ext.FileRequired(localPath, info) { + extractorFound = true + + inv, err := extractWithExtractor(ctx, localPath, info, ext) + if err != nil { + return nil, err + } + + output = append(output, inv...) + } + } + + if !extractorFound { + return nil, ErrNoExtractorsFound + } + + sort.Slice(output, func(i, j int) bool { + if output[i].Name == output[j].Name { + return output[i].Version < output[j].Version + } + + return output[i].Name < output[j].Name + }) + + return output, nil +} + +// Use the extractor specified by extractAs string key +func extractAsSpecific(ctx context.Context, extractAs string, localPath string, info fs.FileInfo) ([]*extractor.Inventory, error) { + for _, ext := range lockfileExtractors { + if lockfileExtractorMapping[extractAs] == ext.Name() { + return extractWithExtractor(ctx, localPath, info, ext) + } + } + + return nil, fmt.Errorf("%w, requested %s", ErrExtractorNotFound, extractAs) +} + +func extractWithExtractor(ctx context.Context, localPath string, info fs.FileInfo, ext filesystem.Extractor) ([]*extractor.Inventory, error) { + si, err := createScanInput(localPath, info) + if err != nil { + return nil, err + } + + inv, err := ext.Extract(ctx, si) + if err != nil { + return nil, fmt.Errorf("(extracting as %s) %w", ext.Name(), err) + } + + for i := range inv { + inv[i].Extractor = ext + } + + return inv, nil +} + +func createScanInput(path string, fileInfo fs.FileInfo) (*filesystem.ScanInput, error) { + reader, err := os.Open(path) + if err != nil { + return nil, err + } + + si := filesystem.ScanInput{ + FS: os.DirFS("/").(scalibrfs.FS), + Path: path, + Root: "/", + Reader: reader, + Info: fileInfo, + } + + return &si, nil +} diff --git a/internal/lockfilescalibr/translation_test.go b/internal/lockfilescalibr/translation_test.go new file mode 100644 index 00000000000..14c5f72e1d9 --- /dev/null +++ b/internal/lockfilescalibr/translation_test.go @@ -0,0 +1,23 @@ +package lockfilescalibr + +import ( + "testing" +) + +func TestLockfileScalibrMappingExists(t *testing.T) { + t.Parallel() + + for _, target := range lockfileExtractorMapping { + found := false + for _, ext := range lockfileExtractors { + if target == ext.Name() { + found = true + break + } + } + + if !found { + t.Errorf("Extractor %v not found.", target) + } + } +} diff --git a/internal/remediation/fixtures/santatracker/osv-scanner.toml b/internal/remediation/fixtures/santatracker/osv-scanner.toml index b399bb4c284..db94704b26f 100644 --- a/internal/remediation/fixtures/santatracker/osv-scanner.toml +++ b/internal/remediation/fixtures/santatracker/osv-scanner.toml @@ -1,191 +1,4 @@ [[PackageOverrides]] -name = "@babel/traverse" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "@grpc/grpc-js" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "acorn" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "ajv" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "ansi-regex" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "braces" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "browserslist" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "dat.gui" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "get-func-name" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "glob-parent" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "google-closure-library" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "html-minifier" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "json-schema" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "json5" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "lodash" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "minimatch" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "minimist" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "node-fetch" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "node-forge " -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "node-forge" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "path-parse" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "pathval" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "postcss" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "protobufjs" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "qs" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "request" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "semver" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "terser" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "tough-cookie" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "ws" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "y18n" -ecosystem = "npm" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "yargs-parser" ecosystem = "npm" ignore = true reason = "This is an intentionally vulnerable test project" diff --git a/internal/remediation/fixtures/zeppelin-server/osv-scanner.toml b/internal/remediation/fixtures/zeppelin-server/osv-scanner.toml index 250f7b75308..d84c70b89ec 100644 --- a/internal/remediation/fixtures/zeppelin-server/osv-scanner.toml +++ b/internal/remediation/fixtures/zeppelin-server/osv-scanner.toml @@ -1,143 +1,4 @@ [[PackageOverrides]] -name = "com.fasterxml.jackson.core:jackson-databind" -ecosystem = "Maven" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "com.google.guava:guava" -ecosystem = "Maven" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "com.jcraft:jsch" -ecosystem = "Maven" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "com.nimbusds:nimbus-jose-jwt" -ecosystem = "Maven" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "io.atomix:atomix" -ecosystem = "Maven" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "io.netty:netty-codec" -ecosystem = "Maven" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "io.netty:netty-handler" -ecosystem = "Maven" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "org.apache.commons:commons-compress" -ecosystem = "Maven" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "org.apache.commons:commons-configuration2" -ecosystem = "Maven" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "org.apache.directory.api:api-ldap-model" -ecosystem = "Maven" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "org.apache.mina:mina-core" -ecosystem = "Maven" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "org.apache.pdfbox:pdfbox" -ecosystem = "Maven" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "org.apache.shiro:shiro-core" -ecosystem = "Maven" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "org.apache.shiro:shiro-web" -ecosystem = "Maven" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "org.apache.thrift:libthrift" -ecosystem = "Maven" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "org.bouncycastle:bcprov-jdk15on" -ecosystem = "Maven" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "org.codehaus.jackson:jackson-mapper-asl" -ecosystem = "Maven" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "org.eclipse.jgit:org.eclipse.jgit" -ecosystem = "Maven" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "org.glassfish.jersey.core:jersey-common" -ecosystem = "Maven" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "com.google.code.gson:gson" -ecosystem = "Maven" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "commons-collections:commons-collections" -ecosystem = "Maven" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "org.apache.httpcomponents:httpclient" -ecosystem = "Maven" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "org.eclipse.jetty:jetty-webapp" -ecosystem = "Maven" -ignore = true -reason = "This is an intentionally vulnerable test project" - -[[PackageOverrides]] -name = "org.quartz-scheduler:quartz" ecosystem = "Maven" ignore = true reason = "This is an intentionally vulnerable test project" diff --git a/internal/sbom/cyclonedx.go b/internal/sbom/cyclonedx.go deleted file mode 100644 index 2fbc09b24be..00000000000 --- a/internal/sbom/cyclonedx.go +++ /dev/null @@ -1,118 +0,0 @@ -package sbom - -import ( - "errors" - "fmt" - "io" - "path/filepath" - "strings" - - "github.com/CycloneDX/cyclonedx-go" -) - -type CycloneDX struct{} - -type cyclonedxType struct { - name string - bomType cyclonedx.BOMFileFormat -} - -var ( - cycloneDXTypes = []cyclonedxType{ - { - name: "json", - bomType: cyclonedx.BOMFileFormatJSON, - }, - { - name: "xml", - bomType: cyclonedx.BOMFileFormatXML, - }, - } -) - -func (c *CycloneDX) Name() string { - return "CycloneDX" -} - -func (c *CycloneDX) MatchesRecognizedFileNames(path string) bool { - // See https://cyclonedx.org/specification/overview/#recognized-file-patterns - expectedGlobs := []string{ - "bom.xml", - "bom.json", - "*.cdx.json", - "*.cdx.xml", - } - filename := filepath.Base(path) - for _, v := range expectedGlobs { - matched, err := filepath.Match(v, filename) - if err != nil { - // Just panic since the only error is invalid glob pattern - panic("Glob pattern is invalid: " + err.Error()) - } - - if matched { - return true - } - } - - return false -} - -func (c *CycloneDX) enumerateComponents(components []cyclonedx.Component, callback func(Identifier) error) error { - for _, component := range components { - if component.PackageURL != "" { - err := callback(Identifier{ - PURL: component.PackageURL, - }) - if err != nil { - return err - } - } - // Components can have components, so enumerate them recursively. - if component.Components != nil { - err := c.enumerateComponents(*component.Components, callback) - if err != nil { - return err - } - } - } - - return nil -} - -func (c *CycloneDX) enumeratePackages(bom *cyclonedx.BOM, callback func(Identifier) error) error { - if bom.Components == nil { - return nil - } - - return c.enumerateComponents(*bom.Components, callback) -} - -func (c *CycloneDX) GetPackages(r io.ReadSeeker, callback func(Identifier) error) error { - //nolint:prealloc // Not sure how many there will be in advance. - var errs []error - var bom cyclonedx.BOM - - for _, formatType := range cycloneDXTypes { - _, err := r.Seek(0, io.SeekStart) - if err != nil { - return fmt.Errorf("failed to seek to start of file: %w", err) - } - decoder := cyclonedx.NewBOMDecoder(r, formatType.bomType) - err = decoder.Decode(&bom) - if err == nil { - if bom.BOMFormat == "CycloneDX" || strings.HasPrefix(bom.XMLNS, "http://cyclonedx.org/schema/bom") { - return c.enumeratePackages(&bom, callback) - } - - err = errors.New("invalid BOMFormat") - } - - errs = append(errs, fmt.Errorf("failed trying %s: %w", formatType.name, err)) - } - - return InvalidFormatError{ - Msg: "failed to parse CycloneDX", - Errs: errs, - } -} diff --git a/internal/sbom/cyclonedx_test.go b/internal/sbom/cyclonedx_test.go deleted file mode 100644 index fe0f34bcd9c..00000000000 --- a/internal/sbom/cyclonedx_test.go +++ /dev/null @@ -1,60 +0,0 @@ -package sbom_test - -import ( - "os" - "path/filepath" - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/google/osv-scanner/internal/sbom" -) - -func runCycloneGetPackages(t *testing.T, bomFile string, want []sbom.Identifier) { - t.Helper() - - f, err := os.Open(filepath.Join("fixtures", bomFile)) - if err != nil { - t.Fatalf("Failed to read fixture file: %v", err) - } - defer f.Close() - - got := []sbom.Identifier{} - callback := func(id sbom.Identifier) error { - got = append(got, id) - return nil - } - - cdx := &sbom.CycloneDX{} - err = cdx.GetPackages(f, callback) - if err != nil { - t.Errorf("GetPackages returned an error: %v", err) - } - - if diff := cmp.Diff(want, got); diff != "" { - t.Errorf("GetPackages() returned an unexpected result (-want +got):\n%s", diff) - } -} - -func TestCycloneDXGetPackages(t *testing.T) { - t.Parallel() - tests := []struct { - bomFile string - identifiers []sbom.Identifier - }{ - { - bomFile: "cyclonedx.json", - identifiers: []sbom.Identifier{ - {PURL: "pkg:maven/org.hdrhistogram/HdrHistogram@2.1.12"}, - {PURL: "pkg:maven/org.apache.logging.log4j/log4j-core@2.16.0"}, - }, - }, - { - bomFile: "cyclonedx-empty.json", - identifiers: []sbom.Identifier{}, - }, - } - - for _, tt := range tests { - runCycloneGetPackages(t, tt.bomFile, tt.identifiers) - } -} diff --git a/internal/sbom/fixtures/cyclonedx-empty.json b/internal/sbom/fixtures/cyclonedx-empty.json deleted file mode 100644 index 19516067d30..00000000000 --- a/internal/sbom/fixtures/cyclonedx-empty.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "bomFormat": "CycloneDX", - "specVersion": "1.4", - "version": 1 -} diff --git a/internal/sbom/fixtures/cyclonedx.json b/internal/sbom/fixtures/cyclonedx.json deleted file mode 100644 index d9421e66c3c..00000000000 --- a/internal/sbom/fixtures/cyclonedx.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "bomFormat": "CycloneDX", - "specVersion": "1.4", - "version": 1, - "components": [ - { - "type": "container", - "name": "/target.tar", - "components": [ - { - "type": "library", - "name": "HdrHistogram", - "purl": "pkg:maven/org.hdrhistogram/HdrHistogram@2.1.12" - } - ] - }, - { - "type": "library", - "name": "Apache Log4j Core", - "purl": "pkg:maven/org.apache.logging.log4j/log4j-core@2.16.0" - } - ] -} diff --git a/internal/sbom/fixtures/spdx-empty.json b/internal/sbom/fixtures/spdx-empty.json deleted file mode 100644 index b91d22857dc..00000000000 --- a/internal/sbom/fixtures/spdx-empty.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "spdxVersion": "SPDX-2.2" -} \ No newline at end of file diff --git a/internal/sbom/fixtures/spdx.json b/internal/sbom/fixtures/spdx.json deleted file mode 100644 index af08dbe2662..00000000000 --- a/internal/sbom/fixtures/spdx.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "spdxVersion": "SPDX-2.2", - "packages": [ - { - "name": "HdrHistogram", - "externalRefs": [ - { - "referenceType": "purl", - "referenceLocator": "pkg:maven/org.hdrhistogram/HdrHistogram@2.1.12" - } - ] - }, - { - "name": "Apache Log4j Core", - "externalRefs": [ - { - "referenceType": "purl", - "referenceLocator": "pkg:maven/org.apache.logging.log4j/log4j-core@2.16.0" - } - ] - } - ] -} \ No newline at end of file diff --git a/internal/sbom/sbom.go b/internal/sbom/sbom.go deleted file mode 100644 index f6b5b810861..00000000000 --- a/internal/sbom/sbom.go +++ /dev/null @@ -1,41 +0,0 @@ -package sbom - -import ( - "fmt" - "io" - "strings" -) - -// Identifier is the identifier extracted from the SBOM. -type Identifier struct { - PURL string -} - -// Reader is an interface for all SBOM providers. -type Reader interface { - Name() string - // MatchesRecognizedFileNames checks if the file path is a standard recognized file name - MatchesRecognizedFileNames(path string) bool - GetPackages(r io.ReadSeeker, callback func(Identifier) error) error -} - -var ( - Providers = []Reader{ - &SPDX{}, - &CycloneDX{}, - } -) - -type InvalidFormatError struct { - Msg string - Errs []error -} - -func (e InvalidFormatError) Error() string { - errStrings := make([]string, 0, len(e.Errs)) - for _, e := range e.Errs { - errStrings = append(errStrings, "\t"+e.Error()) - } - - return fmt.Sprintf("%s:\n%s", e.Msg, strings.Join(errStrings, "\n")) -} diff --git a/internal/sbom/spdx.go b/internal/sbom/spdx.go deleted file mode 100644 index 53c4a4b2b8b..00000000000 --- a/internal/sbom/spdx.go +++ /dev/null @@ -1,87 +0,0 @@ -//nolint:nosnakecase -package sbom - -import ( - "fmt" - "io" - "path/filepath" - "strings" - - spdx_json "github.com/spdx/tools-golang/json" - "github.com/spdx/tools-golang/rdf" - "github.com/spdx/tools-golang/spdx/v2/v2_3" - "github.com/spdx/tools-golang/tagvalue" -) - -type SPDX struct{} -type spdxLoader func(io.Reader) (*v2_3.Document, error) - -type loader struct { - name string - loader spdxLoader -} - -var ( - spdxLoaders = []loader{ - { - name: "json", - loader: spdx_json.Read, - }, - { - name: "rdf", - loader: rdf.Read, - }, - { - name: "tv", - loader: tagvalue.Read, - }, - } -) - -func (s *SPDX) Name() string { - return "SPDX" -} - -func (s *SPDX) MatchesRecognizedFileNames(path string) bool { - // All spdx files should have the .spdx in the filename, even if - // it's not the extension: https://spdx.github.io/spdx-spec/v2.3/conformance/ - return strings.Contains(strings.ToLower(filepath.Base(path)), ".spdx") -} - -func (s *SPDX) enumeratePackages(doc *v2_3.Document, callback func(Identifier) error) error { - for _, p := range doc.Packages { - for _, r := range p.PackageExternalReferences { - if r.RefType == "purl" { - err := callback(Identifier{ - PURL: r.Locator, - }) - if err != nil { - return err - } - } - } - } - - return nil -} - -func (s *SPDX) GetPackages(r io.ReadSeeker, callback func(Identifier) error) error { - //nolint:prealloc // Not sure how many there will be in advance. - var errs []error - for _, loader := range spdxLoaders { - _, err := r.Seek(0, io.SeekStart) - if err != nil { - return fmt.Errorf("failed to seek to start of file: %w", err) - } - doc, err := loader.loader(r) - if err == nil { - return s.enumeratePackages(doc, callback) - } - errs = append(errs, fmt.Errorf("failed trying %s: %w", loader.name, err)) - } - - return InvalidFormatError{ - Msg: "failed to parse SPDX", - Errs: errs, - } -} diff --git a/internal/sbom/spdx_test.go b/internal/sbom/spdx_test.go deleted file mode 100644 index 820eb24eb8e..00000000000 --- a/internal/sbom/spdx_test.go +++ /dev/null @@ -1,60 +0,0 @@ -package sbom_test - -import ( - "os" - "path/filepath" - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/google/osv-scanner/internal/sbom" -) - -func runSPDXGetPackages(t *testing.T, bomFile string, want []sbom.Identifier) { - t.Helper() - - f, err := os.Open(filepath.Join("fixtures", bomFile)) - if err != nil { - t.Fatalf("Failed to read fixture file: %v", err) - } - defer f.Close() - - got := []sbom.Identifier{} - callback := func(id sbom.Identifier) error { - got = append(got, id) - return nil - } - - spdx := &sbom.SPDX{} - err = spdx.GetPackages(f, callback) - if err != nil { - t.Errorf("GetPackages returned an error: %v", err) - } - - if diff := cmp.Diff(want, got); diff != "" { - t.Errorf("GetPackages() returned an unexpected result (-want +got):\n%s", diff) - } -} - -func TestSPDXGetPackages(t *testing.T) { - t.Parallel() - tests := []struct { - spdxFile string - identifiers []sbom.Identifier - }{ - { - spdxFile: "spdx.json", - identifiers: []sbom.Identifier{ - {PURL: "pkg:maven/org.hdrhistogram/HdrHistogram@2.1.12"}, - {PURL: "pkg:maven/org.apache.logging.log4j/log4j-core@2.16.0"}, - }, - }, - { - spdxFile: "spdx-empty.json", - identifiers: []sbom.Identifier{}, - }, - } - - for _, tt := range tests { - runSPDXGetPackages(t, tt.spdxFile, tt.identifiers) - } -} diff --git a/pkg/osvscanner/osvscanner.go b/pkg/osvscanner/osvscanner.go index 29112cc1e22..83827024eb4 100644 --- a/pkg/osvscanner/osvscanner.go +++ b/pkg/osvscanner/osvscanner.go @@ -2,6 +2,8 @@ package osvscanner import ( "bufio" + "cmp" + "context" "crypto/md5" //nolint:gosec "errors" "fmt" @@ -11,19 +13,28 @@ import ( "path" "path/filepath" "slices" - "sort" "strings" + "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scalibr/extractor/filesystem" + "github.com/google/osv-scalibr/extractor/filesystem/os/apk" + "github.com/google/osv-scalibr/extractor/filesystem/os/dpkg" + scalibrosv "github.com/google/osv-scalibr/extractor/filesystem/osv" + "github.com/google/osv-scalibr/extractor/filesystem/sbom/cdx" + "github.com/google/osv-scalibr/extractor/filesystem/sbom/spdx" + "golang.org/x/exp/maps" + "github.com/google/osv-scanner/internal/config" "github.com/google/osv-scanner/internal/customgitignore" "github.com/google/osv-scanner/internal/depsdev" "github.com/google/osv-scanner/internal/image" "github.com/google/osv-scanner/internal/local" - "github.com/google/osv-scanner/internal/manifest" + "github.com/google/osv-scanner/internal/lockfilescalibr" + "github.com/google/osv-scanner/internal/lockfilescalibr/language/java/pomxmlnet" + "github.com/google/osv-scanner/internal/lockfilescalibr/language/osv/osvscannerjson" "github.com/google/osv-scanner/internal/output" "github.com/google/osv-scanner/internal/resolution/client" "github.com/google/osv-scanner/internal/resolution/datasource" - "github.com/google/osv-scanner/internal/sbom" "github.com/google/osv-scanner/internal/semantic" "github.com/google/osv-scanner/internal/version" "github.com/google/osv-scanner/pkg/lockfile" @@ -37,16 +48,16 @@ import ( ) type ScannerActions struct { - LockfilePaths []string - SBOMPaths []string - DirectoryPaths []string - GitCommits []string - Recursive bool - SkipGit bool - NoIgnore bool - DockerContainerNames []string - ConfigOverridePath string - CallAnalysisStates map[string]bool + LockfilePaths []string + SBOMPaths []string + DirectoryPaths []string + GitCommits []string + Recursive bool + SkipGit bool + NoIgnore bool + DockerImageName string + ConfigOverridePath string + CallAnalysisStates map[string]bool ExperimentalScannerActions } @@ -171,17 +182,19 @@ func scanDir(r reporter.Reporter, dir string, skipGit bool, recursive bool, useG } if !info.IsDir() { - if extractor, _ := lockfile.FindExtractor(path, ""); extractor != nil { - pkgs, err := scanLockfile(r, path, "", transitiveAct) - if err != nil { + pkgs, err := scanLockfile(r, path, "", transitiveAct) + if err != nil { + // If no extractors found then just continue + if !errors.Is(err, lockfilescalibr.ErrNoExtractorsFound) { r.Errorf("Attempted to scan lockfile but failed: %s\n", path) } - scannedPackages = append(scannedPackages, pkgs...) } + scannedPackages = append(scannedPackages, pkgs...) + // No need to check for error // If scan fails, it means it isn't a valid SBOM file, // so just move onto the next file - pkgs, _ := scanSBOMFile(r, path, true) + pkgs, _ = scanSBOMFile(r, path, true) scannedPackages = append(scannedPackages, pkgs...) } @@ -356,27 +369,29 @@ func scanImage(r reporter.Reporter, path string) ([]scannedPackage, error) { // within to `query` func scanLockfile(r reporter.Reporter, path string, parseAs string, transitiveAct TransitiveScanningActions) ([]scannedPackage, error) { var err error - var parsedLockfile lockfile.Lockfile - - f, err := lockfile.OpenLocalDepFile(path) - - if err == nil { - // special case for the APK and DPKG parsers because they have a very generic name while - // living at a specific location, so they are not included in the map of parsers - // used by lockfile.Parse to avoid false-positives when scanning projects - switch parseAs { - case "apk-installed": - parsedLockfile, err = lockfile.FromApkInstalled(path) - case "dpkg-status": - parsedLockfile, err = lockfile.FromDpkgStatus(path) - case "osv-scanner": - parsedLockfile, err = lockfile.FromOSVScannerResults(path) - default: - if !transitiveAct.Disabled && (parseAs == "pom.xml" || filepath.Base(path) == "pom.xml") { - parsedLockfile, err = extractMavenDeps(f, transitiveAct) - } else { - parsedLockfile, err = lockfile.ExtractDeps(f, parseAs) + + var inventories []*extractor.Inventory + + // special case for the APK and DPKG parsers because they have a very generic name while + // living at a specific location, so they are not included in the map of parsers + // used by lockfile.Parse to avoid false-positives when scanning projects + switch parseAs { + case "apk-installed": + inventories, err = lockfilescalibr.ExtractWithExtractor(context.Background(), path, apk.New(apk.DefaultConfig())) + case "dpkg-status": + inventories, err = lockfilescalibr.ExtractWithExtractor(context.Background(), path, dpkg.New(dpkg.DefaultConfig())) + case "osv-scanner": + inventories, err = lockfilescalibr.ExtractWithExtractor(context.Background(), path, osvscannerjson.Extractor{}) + default: + if !transitiveAct.Disabled && (parseAs == "pom.xml" || filepath.Base(path) == "pom.xml") { + ext, extErr := createMavenExtractor(transitiveAct) + if extErr != nil { + return nil, extErr } + + inventories, err = lockfilescalibr.ExtractWithExtractor(context.Background(), path, ext) + } else { + inventories, err = lockfilescalibr.Extract(context.Background(), path, parseAs) } } @@ -390,33 +405,57 @@ func scanLockfile(r reporter.Reporter, path string, parseAs string, transitiveAc parsedAsComment = fmt.Sprintf("as a %s ", parseAs) } + slices.SortFunc(inventories, func(i, j *extractor.Inventory) int { + return cmp.Or( + strings.Compare(i.Name, j.Name), + strings.Compare(i.Version, j.Version), + ) + }) + + pkgCount := len(inventories) + r.Infof( "Scanned %s file %sand found %d %s\n", path, parsedAsComment, - len(parsedLockfile.Packages), - output.Form(len(parsedLockfile.Packages), "package", "packages"), + pkgCount, + output.Form(pkgCount, "package", "packages"), ) - packages := make([]scannedPackage, len(parsedLockfile.Packages)) - for i, pkgDetail := range parsedLockfile.Packages { - packages[i] = scannedPackage{ - Name: pkgDetail.Name, - Version: pkgDetail.Version, - Commit: pkgDetail.Commit, - Ecosystem: pkgDetail.Ecosystem, - DepGroups: pkgDetail.DepGroups, + packages := make([]scannedPackage, 0, pkgCount) + + for _, inv := range inventories { + scannedPackage := scannedPackage{ + Name: inv.Name, + Version: inv.Version, Source: models.SourceInfo{ Path: path, Type: "lockfile", }, } + if inv.SourceCode != nil { + scannedPackage.Commit = inv.SourceCode.Commit + } + eco := inv.Ecosystem() + // TODO(rexpan): Refactor these minor patches to individual items + // TODO: Ecosystem should be pared with Enum : Suffix + if eco == "Alpine" { + eco = "Alpine:v3.20" + } + + scannedPackage.Ecosystem = lockfile.Ecosystem(eco) + + if dg, ok := inv.Metadata.(scalibrosv.DepGroups); ok { + scannedPackage.DepGroups = dg.DepGroups() + } + + packages = append(packages, scannedPackage) } return packages, nil } -func extractMavenDeps(f lockfile.DepFile, actions TransitiveScanningActions) (lockfile.Lockfile, error) { +func createMavenExtractor(actions TransitiveScanningActions) (*pomxmlnet.Extractor, error) { var depClient client.DependencyClient var err error if actions.NativeDataSource { @@ -425,145 +464,106 @@ func extractMavenDeps(f lockfile.DepFile, actions TransitiveScanningActions) (lo depClient, err = client.NewDepsDevClient(depsdev.DepsdevAPI) } if err != nil { - return lockfile.Lockfile{}, err + return nil, err } mavenClient, err := datasource.NewMavenRegistryAPIClient(actions.MavenRegistry) if err != nil { - return lockfile.Lockfile{}, err + return nil, err } - extractor := manifest.MavenResolverExtractor{ + extractor := pomxmlnet.Extractor{ DependencyClient: depClient, MavenRegistryAPIClient: mavenClient, } - packages, err := extractor.Extract(f) - if err != nil { - err = fmt.Errorf("failed extracting %s: %w", f.Path(), err) - } - - // Sort packages for testing convenience. - sort.Slice(packages, func(i, j int) bool { - if packages[i].Name == packages[j].Name { - return packages[i].Version < packages[j].Version - } - - return packages[i].Name < packages[j].Name - }) - return lockfile.Lockfile{ - FilePath: f.Path(), - ParsedAs: "pom.xml", - Packages: packages, - }, err + return &extractor, nil } // scanSBOMFile will load, identify, and parse the SBOM path passed in, and add the dependencies specified // within to `query` func scanSBOMFile(r reporter.Reporter, path string, fromFSScan bool) ([]scannedPackage, error) { var errs []error + + sbomExtractors := []filesystem.Extractor{ + spdx.Extractor{}, + cdx.Extractor{}, + } + + extNameMapping := map[string]string{ + spdx.Extractor{}.Name(): "SPDX", + cdx.Extractor{}.Name(): "CycloneDX", + } + packages := map[string]scannedPackage{} - for _, provider := range sbom.Providers { - if fromFSScan && !provider.MatchesRecognizedFileNames(path) { - // Skip if filename is not usually a sbom file of this format. - // Only do this if this is being done in a filesystem scanning context, where we need to be - // careful about spending too much time attempting to parse unrelated files. - // If this is coming from an explicit scan argument, be more relaxed here since it's common for - // filenames to not conform to expected filename standards. + + stat, err := os.Stat(path) + if err != nil { + return nil, err + } + + for _, provider := range sbomExtractors { + if !provider.FileRequired(path, stat) { continue } - // Opening file inside loop is OK, since providers is not very long, - // and it is unlikely that multiple providers accept the same file name - file, err := os.Open(path) + invs, err := lockfilescalibr.ExtractWithExtractor(context.Background(), path, provider) + if err != nil { - return nil, err + errs = append(errs, fmt.Errorf("failed to parse %s as %s SBOM file: %w", path, extNameMapping[provider.Name()], err)) + continue } - defer file.Close() - var ignoredPURLs []string - err = provider.GetPackages(file, func(id sbom.Identifier) error { - _, err := models.PURLToPackage(id.PURL) - if err != nil { - ignoredPURLs = append(ignoredPURLs, id.PURL) - //nolint:nilerr - return nil - } + if len(invs) == 0 { + errs = append(errs, + fmt.Errorf( + "scanned %s as %s SBOM, but failed to find any package URLs, this is required to scan SBOMs", + path, + extNameMapping[provider.Name()], + ), + ) + continue + } - if _, ok := packages[id.PURL]; ok { - r.Warnf("Warning, duplicate PURL found in SBOM: %s\n", id.PURL) + for _, inv := range invs { + purl := inv.Extractor.ToPURL(inv) + if purl == nil { + continue } - - packages[id.PURL] = scannedPackage{ - PURL: id.PURL, + sp := scannedPackage{ + PURL: purl.String(), Source: models.SourceInfo{ Path: path, Type: "sbom", }, } - return nil - }) - if err == nil { - // Found a parsable format. - if len(packages) == 0 { - // But no entries found, so maybe not the correct format - errs = append(errs, sbom.InvalidFormatError{ - Msg: "no Package URLs found", - Errs: []error{ - fmt.Errorf("scanned %s as %s SBOM, but failed to find any package URLs, this is required to scan SBOMs", path, provider.Name()), - }, - }) - - continue + if _, ok := packages[sp.PURL]; ok { + r.Warnf("Warning, duplicate PURL found in SBOM: %s\n", sp.PURL) } - r.Infof( - "Scanned %s as %s SBOM and found %d %s\n", - path, - provider.Name(), - len(packages), - output.Form(len(packages), "package", "packages"), - ) - if len(ignoredPURLs) > 0 { - r.Warnf( - "Ignored %d %s with invalid PURLs\n", - len(ignoredPURLs), - output.Form(len(ignoredPURLs), "package", "packages"), - ) - slices.Sort(ignoredPURLs) - for _, purl := range slices.Compact(ignoredPURLs) { - r.Warnf( - "Ignored invalid PURL \"%s\"\n", - purl, - ) - } - } - - sliceOfPackages := make([]scannedPackage, 0, len(packages)) - for _, pkg := range packages { - sliceOfPackages = append(sliceOfPackages, pkg) - } - - slices.SortFunc(sliceOfPackages, func(i, j scannedPackage) int { - return strings.Compare(i.PURL, j.PURL) - }) - - return sliceOfPackages, nil + packages[sp.PURL] = sp } - var formatErr sbom.InvalidFormatError - if errors.As(err, &formatErr) { - errs = append(errs, err) - continue - } + sliceOfPackages := maps.Values(packages) + slices.SortFunc(sliceOfPackages, func(i, j scannedPackage) int { + return strings.Compare(i.PURL, j.PURL) + }) - return nil, err + r.Infof( + "Scanned %s as %s SBOM and found %d %s\n", + path, + extNameMapping[provider.Name()], + len(packages), + output.Form(len(packages), "package", "packages"), + ) + + return sliceOfPackages, nil } // Don't log these errors if we're coming from an FS scan, since it can get very noisy. if !fromFSScan { - r.Infof("Failed to parse SBOM using all supported formats:\n") + r.Infof("Failed to parse SBOM using any supported formats:\n") for _, err := range errs { r.Infof("%s\n", err.Error()) } @@ -644,72 +644,77 @@ func createCommitQueryPackage(commit string, source string) scannedPackage { } } -func scanDebianDocker(r reporter.Reporter, dockerImageName string) ([]scannedPackage, error) { - cmd := exec.Command("docker", "run", "--rm", "--entrypoint", "/usr/bin/dpkg-query", dockerImageName, "-f", "${Package}###${Version}\\n", "-W") - stdout, err := cmd.StdoutPipe() +func runCommandLogError(r reporter.Reporter, name string, args ...string) error { + cmd := exec.Command(name, args...) + + // Get stderr for debugging when docker fails + stderr, err := cmd.StderrPipe() + if err != nil { + r.Errorf("Failed to get stderr: %s\n", err) + return err + } + + err = cmd.Start() + if err != nil { + r.Errorf("Failed to run docker command (%q): %s\n", cmd.String(), err) + return err + } + // This has to be captured before cmd.Wait() is called, as cmd.Wait() closes the stderr pipe. + var stderrLines []string + scanner := bufio.NewScanner(stderr) + for scanner.Scan() { + stderrLines = append(stderrLines, scanner.Text()) + } + + err = cmd.Wait() + if err != nil { + r.Errorf("Docker command exited with code (%q): %d\nSTDERR:\n", cmd.String(), cmd.ProcessState.ExitCode()) + for _, line := range stderrLines { + r.Errorf("> %s\n", line) + } + + return errors.New("failed to run docker command") + } + + return nil +} +func scanDockerImage(r reporter.Reporter, dockerImageName string) ([]scannedPackage, error) { + tempImageFile, err := os.CreateTemp("", "docker-image-*.tar") if err != nil { - r.Errorf("Failed to get stdout: %s\n", err) + r.Errorf("Failed to create temporary file: %s\n", err) return nil, err } - stderr, err := cmd.StderrPipe() + err = tempImageFile.Close() if err != nil { - r.Errorf("Failed to get stderr: %s\n", err) return nil, err } + defer os.Remove(tempImageFile.Name()) - err = cmd.Start() + r.Infof("Pulling docker image (%q)...\n", dockerImageName) + err = runCommandLogError(r, "docker", "pull", "-q", dockerImageName) if err != nil { - r.Errorf("Failed to start docker image: %s\n", err) return nil, err } - defer func() { - var stderrlines []string - scanner := bufio.NewScanner(stderr) - for scanner.Scan() { - stderrlines = append(stderrlines, scanner.Text()) - } + r.Infof("Saving docker image (%q) to temporary file...\n", dockerImageName) + err = runCommandLogError(r, "docker", "save", "-o", tempImageFile.Name(), dockerImageName) + if err != nil { + return nil, err + } - err := cmd.Wait() - if err != nil { - r.Errorf("Docker command exited with code %d\n", cmd.ProcessState.ExitCode()) - for _, line := range stderrlines { - r.Errorf("> %s\n", line) - } - } - }() + r.Infof("Scanning image...\n") + packages, err := scanImage(r, tempImageFile.Name()) + if err != nil { + return nil, err + } - scanner := bufio.NewScanner(stdout) - var packages []scannedPackage - for scanner.Scan() { - text := scanner.Text() - text = strings.TrimSpace(text) - if len(text) == 0 { - continue - } - splitText := strings.Split(text, "###") - if len(splitText) != 2 { - r.Errorf("Unexpected output from Debian container: \n\n%s\n", text) - return nil, fmt.Errorf("unexpected output from Debian container: \n\n%s", text) - } - // TODO(rexpan): Get and specify exact debian release version - packages = append(packages, scannedPackage{ - Name: splitText[0], - Version: splitText[1], - Ecosystem: "Debian", - Source: models.SourceInfo{ - Path: dockerImageName, - Type: "docker", - }, - }) + // Modify the image path to be the image name, rather than the temporary file name + for i := range packages { + _, internalPath, _ := strings.Cut(packages[i].Source.Path, ":") + packages[i].Source.Path = dockerImageName + ":" + internalPath } - r.Infof( - "Scanned docker image with %d %s\n", - len(packages), - output.Form(len(packages), "package", "packages"), - ) return packages, nil } @@ -895,9 +900,11 @@ func DoScan(actions ScannerActions, r reporter.Reporter) (models.VulnerabilityRe scannedPackages = append(scannedPackages, pkgs...) } - // TODO: Deprecated - for _, container := range actions.DockerContainerNames { - pkgs, _ := scanDebianDocker(r, container) + if actions.DockerImageName != "" { + pkgs, err := scanDockerImage(r, actions.DockerImageName) + if err != nil { + return models.VulnerabilityResults{}, err + } scannedPackages = append(scannedPackages, pkgs...) } @@ -1048,7 +1055,12 @@ func filterIgnoredPackages(r reporter.Reporter, packages []scannedPackage, confi } if ignore, ignoreLine := configToUse.ShouldIgnorePackage(pkg); ignore { - pkgString := fmt.Sprintf("%s/%s/%s", p.Ecosystem, p.Name, p.Version) + var pkgString string + if p.PURL != "" { + pkgString = p.PURL + } else { + pkgString = fmt.Sprintf("%s/%s/%s", p.Ecosystem, p.Name, p.Version) + } reason := ignoreLine.Reason if reason == "" {