Skip to content

Commit 352f59f

Browse files
committed
rpm: add function to determine if paths are RPM
Allow language indexers to check if the filepaths they are interested in have been installed via RPM. Signed-off-by: crozzy <[email protected]>
1 parent bea8a6c commit 352f59f

File tree

7 files changed

+438
-196
lines changed

7 files changed

+438
-196
lines changed

file.go

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ type FileKind string
55

66
const (
77
FileKindWhiteout = FileKind("whiteout")
8+
FileKindRPM = FileKind("rpm")
89
)
910

1011
// File represents interesting files that are found in the layer.

gobin/gobin.go

+15-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828

2929
"github.com/quay/claircore"
3030
"github.com/quay/claircore/indexer"
31+
"github.com/quay/claircore/rpm"
3132
)
3233

3334
// Detector detects go binaries and reports the packages used to build them.
@@ -86,7 +87,8 @@ func (Detector) Scan(ctx context.Context, l *claircore.Layer) ([]*claircore.Pack
8687
// Only create a single spool file per call, re-use for every binary.
8788
var spool spoolfile
8889
walk := func(p string, d fs.DirEntry, err error) error {
89-
ctx := zlog.ContextWithValues(ctx, "path", d.Name())
90+
ctx := zlog.ContextWithValues(ctx, "filename", d.Name())
91+
9092
switch {
9193
case err != nil:
9294
return err
@@ -107,6 +109,18 @@ func (Detector) Scan(ctx context.Context, l *claircore.Layer) ([]*claircore.Pack
107109
// Not executable
108110
return nil
109111
}
112+
113+
isRPM, err := rpm.FileInstalledByRPM(ctx, l, p)
114+
if err != nil {
115+
return err
116+
}
117+
if isRPM {
118+
zlog.Debug(ctx).
119+
Str("path", p).
120+
Msg("file path determined to be of RPM origin")
121+
return nil
122+
}
123+
110124
f, err := sys.Open(p)
111125
if err != nil {
112126
// TODO(crozzy): Remove log line once controller is in a

python/packagescanner.go

+15-4
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"github.com/quay/claircore"
2020
"github.com/quay/claircore/indexer"
2121
"github.com/quay/claircore/pkg/pep440"
22+
"github.com/quay/claircore/rpm"
2223
)
2324

2425
var (
@@ -79,6 +80,16 @@ func (ps *Scanner) Scan(ctx context.Context, layer *claircore.Layer) ([]*clairco
7980
}
8081
var ret []*claircore.Package
8182
for _, n := range ms {
83+
isRPM, err := rpm.FileInstalledByRPM(ctx, layer, n)
84+
if err != nil {
85+
return nil, err
86+
}
87+
if isRPM {
88+
zlog.Debug(ctx).
89+
Str("path", n).
90+
Msg("file path determined to be of RPM origin")
91+
continue
92+
}
8293
b, err := fs.ReadFile(sys, n)
8394
if err != nil {
8495
return nil, fmt.Errorf("python: unable to read file: %w", err)
@@ -143,14 +154,14 @@ func findDeliciousEgg(ctx context.Context, sys fs.FS) (out []string, err error)
143154
// Is this layer an rpm layer?
144155
//
145156
// If so, files in the disto-managed directory can be skipped.
146-
var rpm bool
157+
var isRPM bool
147158
for _, p := range []string{
148159
"var/lib/rpm/Packages",
149160
"var/lib/rpm/rpmdb.sqlite",
150161
"var/lib/rpm/Packages.db",
151162
} {
152163
if fi, err := fs.Stat(sys, p); err == nil && fi.Mode().IsRegular() {
153-
rpm = true
164+
isRPM = true
154165
break
155166
}
156167
}
@@ -172,12 +183,12 @@ func findDeliciousEgg(ctx context.Context, sys fs.FS) (out []string, err error)
172183
switch {
173184
case err != nil:
174185
return err
175-
case (rpm || dpkg) && d.Type().IsDir():
186+
case (isRPM || dpkg) && d.Type().IsDir():
176187
// Skip one level up from the "packages" directory so the walk also
177188
// skips the standard library.
178189
var pat string
179190
switch {
180-
case rpm:
191+
case isRPM:
181192
pat = `usr/lib*/python[23].*`
182193
ev = ev.Bool("rpm_dir", true)
183194
case dpkg:

rpm/files.go

+99
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
package rpm
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"io/fs"
7+
"sync"
8+
9+
"github.com/quay/claircore"
10+
"github.com/quay/zlog"
11+
)
12+
13+
// filesCache is used for thread-safe access to the map containing layer.Hash -> RPM files.
14+
type filesCache struct {
15+
c map[string]map[string]struct{}
16+
mu *sync.Mutex
17+
}
18+
19+
var fc *filesCache
20+
21+
func init() {
22+
fc = &filesCache{
23+
c: map[string]map[string]struct{}{},
24+
mu: &sync.Mutex{},
25+
}
26+
}
27+
28+
// gc deletes the entry from the map if the ctx is done, this ties the lifecycle of
29+
// the cached information to the request lifecycle to avoid excessive memory consumption.
30+
func (fc *filesCache) gc(ctx context.Context, key string) {
31+
<-ctx.Done()
32+
fc.mu.Lock()
33+
defer fc.mu.Unlock()
34+
delete(fc.c, key)
35+
}
36+
37+
// getFiles looks up RPM files that exist in the RPM database using the filesFromDB
38+
// function and memorizes the result to avoid repeated work for the same claircore.Layer.
39+
func (fc *filesCache) getFiles(ctx context.Context, layer *claircore.Layer) (map[string]struct{}, error) {
40+
if fc == nil {
41+
panic("programmer error: filesCache nil")
42+
}
43+
fc.mu.Lock()
44+
defer fc.mu.Unlock()
45+
if files, ok := fc.c[layer.Hash.String()]; ok {
46+
return files, nil
47+
}
48+
49+
sys, err := layer.FS()
50+
if err != nil {
51+
return nil, fmt.Errorf("rpm: unable to open layer: %w", err)
52+
}
53+
54+
found := make([]foundDB, 0)
55+
if err := fs.WalkDir(sys, ".", findDBs(ctx, &found, sys)); err != nil {
56+
return nil, fmt.Errorf("rpm: error walking fs: %w", err)
57+
}
58+
if len(found) == 0 {
59+
return nil, nil
60+
}
61+
62+
done := map[string]struct{}{}
63+
files := map[string]struct{}{}
64+
65+
zlog.Debug(ctx).Int("count", len(found)).Msg("found possible databases")
66+
for _, db := range found {
67+
ctx := zlog.ContextWithValues(ctx, "db", db.String())
68+
zlog.Debug(ctx).Msg("examining database")
69+
if _, ok := done[db.Path]; ok {
70+
zlog.Debug(ctx).Msg("already seen, skipping")
71+
continue
72+
}
73+
done[db.Path] = struct{}{}
74+
fs, err := getDBObjects(ctx, sys, db, filesFromDB)
75+
if err != nil {
76+
return nil, fmt.Errorf("rpm: error getting native DBs: %w", err)
77+
}
78+
for _, f := range fs {
79+
files[f.Path] = struct{}{}
80+
}
81+
}
82+
fc.c[layer.Hash.String()] = files
83+
go func() {
84+
fc.gc(ctx, layer.Hash.String())
85+
}()
86+
87+
return files, nil
88+
}
89+
90+
// FileInstalledByRPM takes a claircore.Layer and filepath string and returns a boolean
91+
// signifying whether that file came from an RPM package.
92+
func FileInstalledByRPM(ctx context.Context, layer *claircore.Layer, filepath string) (bool, error) {
93+
files, err := fc.getFiles(ctx, layer)
94+
if err != nil {
95+
return false, err
96+
}
97+
_, exists := files[filepath]
98+
return exists, nil
99+
}

rpm/files_test.go

+74
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
package rpm
2+
3+
import (
4+
"context"
5+
"testing"
6+
7+
"github.com/quay/claircore"
8+
"github.com/quay/claircore/test"
9+
"github.com/quay/zlog"
10+
)
11+
12+
var rpmFilesTestcases = []struct {
13+
name string
14+
isRPM bool
15+
filePath string
16+
layer test.LayerRef
17+
}{
18+
{
19+
name: "Found",
20+
isRPM: true,
21+
filePath: "usr/lib/node_modules/npm/node_modules/safe-buffer/package.json",
22+
layer: test.LayerRef{
23+
Registry: "registry.access.redhat.com",
24+
Name: "ubi9/nodejs-18",
25+
Digest: `sha256:1ae06b64755052cef4c32979aded82a18f664c66fa7b50a6d2924afac2849c6e`,
26+
},
27+
},
28+
{
29+
name: "Not found",
30+
isRPM: false,
31+
filePath: "usr/lib/node_modules/npm/node_modules/safe-buffer/package.jsonx",
32+
layer: test.LayerRef{
33+
Registry: "registry.access.redhat.com",
34+
Name: "ubi9/nodejs-18",
35+
Digest: `sha256:1ae06b64755052cef4c32979aded82a18f664c66fa7b50a6d2924afac2849c6e`,
36+
},
37+
},
38+
}
39+
40+
func TestIsRPMFile(t *testing.T) {
41+
ctx := zlog.Test(context.Background(), t)
42+
a := test.NewCachedArena(t)
43+
44+
for _, tt := range rpmFilesTestcases {
45+
t.Run(tt.name, func(t *testing.T) {
46+
a.LoadLayerFromRegistry(ctx, t, tt.layer)
47+
r := a.Realizer(ctx).(*test.CachedRealizer)
48+
t.Cleanup(func() {
49+
if err := r.Close(); err != nil {
50+
t.Error(err)
51+
}
52+
})
53+
realizedLayers, err := r.RealizeDescriptions(ctx, []claircore.LayerDescription{
54+
{
55+
Digest: tt.layer.Digest,
56+
URI: "http://example.com",
57+
MediaType: test.MediaType,
58+
Headers: make(map[string][]string),
59+
},
60+
})
61+
if err != nil {
62+
t.Fatal(err)
63+
}
64+
isRPM, err := FileInstalledByRPM(ctx, &realizedLayers[0], tt.filePath)
65+
if err != nil {
66+
t.Fatal(err)
67+
}
68+
if tt.isRPM != isRPM {
69+
t.Errorf("expected isRPM: %t, got isRPM: %t", tt.isRPM, isRPM)
70+
}
71+
})
72+
}
73+
74+
}

0 commit comments

Comments
 (0)