Skip to content

Commit adba6a6

Browse files
committed
rpm: add function to determine if paths are RPM
Allow language indexers to check if the filepaths they are interested in have been installed via RPM. Signed-off-by: crozzy <[email protected]>
1 parent bea8a6c commit adba6a6

File tree

7 files changed

+444
-196
lines changed

7 files changed

+444
-196
lines changed

file.go

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ type FileKind string
55

66
const (
77
FileKindWhiteout = FileKind("whiteout")
8+
FileKindRPM = FileKind("rpm")
89
)
910

1011
// File represents interesting files that are found in the layer.

gobin/gobin.go

+15-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828

2929
"github.com/quay/claircore"
3030
"github.com/quay/claircore/indexer"
31+
"github.com/quay/claircore/rpm"
3132
)
3233

3334
// Detector detects go binaries and reports the packages used to build them.
@@ -86,7 +87,8 @@ func (Detector) Scan(ctx context.Context, l *claircore.Layer) ([]*claircore.Pack
8687
// Only create a single spool file per call, re-use for every binary.
8788
var spool spoolfile
8889
walk := func(p string, d fs.DirEntry, err error) error {
89-
ctx := zlog.ContextWithValues(ctx, "path", d.Name())
90+
ctx := zlog.ContextWithValues(ctx, "filename", d.Name())
91+
9092
switch {
9193
case err != nil:
9294
return err
@@ -107,6 +109,18 @@ func (Detector) Scan(ctx context.Context, l *claircore.Layer) ([]*claircore.Pack
107109
// Not executable
108110
return nil
109111
}
112+
113+
isRPM, err := rpm.FileInstalledByRPM(ctx, l, p)
114+
if err != nil {
115+
return err
116+
}
117+
if isRPM {
118+
zlog.Debug(ctx).
119+
Str("path", p).
120+
Msg("file path determined to be of RPM origin")
121+
return nil
122+
}
123+
110124
f, err := sys.Open(p)
111125
if err != nil {
112126
// TODO(crozzy): Remove log line once controller is in a

python/packagescanner.go

+15-4
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"github.com/quay/claircore"
2020
"github.com/quay/claircore/indexer"
2121
"github.com/quay/claircore/pkg/pep440"
22+
"github.com/quay/claircore/rpm"
2223
)
2324

2425
var (
@@ -79,6 +80,16 @@ func (ps *Scanner) Scan(ctx context.Context, layer *claircore.Layer) ([]*clairco
7980
}
8081
var ret []*claircore.Package
8182
for _, n := range ms {
83+
isRPM, err := rpm.FileInstalledByRPM(ctx, layer, n)
84+
if err != nil {
85+
return nil, err
86+
}
87+
if isRPM {
88+
zlog.Debug(ctx).
89+
Str("path", n).
90+
Msg("file path determined to be of RPM origin")
91+
continue
92+
}
8293
b, err := fs.ReadFile(sys, n)
8394
if err != nil {
8495
return nil, fmt.Errorf("python: unable to read file: %w", err)
@@ -143,14 +154,14 @@ func findDeliciousEgg(ctx context.Context, sys fs.FS) (out []string, err error)
143154
// Is this layer an rpm layer?
144155
//
145156
// If so, files in the disto-managed directory can be skipped.
146-
var rpm bool
157+
var isRPM bool
147158
for _, p := range []string{
148159
"var/lib/rpm/Packages",
149160
"var/lib/rpm/rpmdb.sqlite",
150161
"var/lib/rpm/Packages.db",
151162
} {
152163
if fi, err := fs.Stat(sys, p); err == nil && fi.Mode().IsRegular() {
153-
rpm = true
164+
isRPM = true
154165
break
155166
}
156167
}
@@ -172,12 +183,12 @@ func findDeliciousEgg(ctx context.Context, sys fs.FS) (out []string, err error)
172183
switch {
173184
case err != nil:
174185
return err
175-
case (rpm || dpkg) && d.Type().IsDir():
186+
case (isRPM || dpkg) && d.Type().IsDir():
176187
// Skip one level up from the "packages" directory so the walk also
177188
// skips the standard library.
178189
var pat string
179190
switch {
180-
case rpm:
191+
case isRPM:
181192
pat = `usr/lib*/python[23].*`
182193
ev = ev.Bool("rpm_dir", true)
183194
case dpkg:

rpm/files.go

+102
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
package rpm
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"io/fs"
7+
"sync"
8+
9+
"github.com/quay/claircore"
10+
"github.com/quay/zlog"
11+
)
12+
13+
// filesCache is used for concurrent access to the map containing layer.Hash -> map RPM files.
14+
// The value is a map to allow for quick member checking.
15+
type filesCache struct {
16+
c map[string]map[string]struct{}
17+
mu *sync.Mutex
18+
}
19+
20+
var fc *filesCache
21+
22+
func init() {
23+
fc = &filesCache{
24+
c: map[string]map[string]struct{}{},
25+
mu: &sync.Mutex{},
26+
}
27+
}
28+
29+
// gc deletes the layer's entry from the map if the ctx is done, this ties the lifecycle of
30+
// the cached information to the request lifecycle to avoid excessive memory consumption.
31+
func (fc *filesCache) gc(ctx context.Context, key string) {
32+
<-ctx.Done()
33+
fc.mu.Lock()
34+
defer fc.mu.Unlock()
35+
delete(fc.c, key)
36+
}
37+
38+
// getFiles looks up RPM files that exist in the RPM database using the filesFromDB
39+
// function and memorizes the result to avoid repeated work for the same claircore.Layer.
40+
func (fc *filesCache) getFiles(ctx context.Context, layer *claircore.Layer) (map[string]struct{}, error) {
41+
if fc == nil {
42+
panic("programmer error: filesCache nil")
43+
}
44+
fc.mu.Lock()
45+
defer fc.mu.Unlock()
46+
if files, ok := fc.c[layer.Hash.String()]; ok {
47+
return files, nil
48+
}
49+
50+
sys, err := layer.FS()
51+
if err != nil {
52+
return nil, fmt.Errorf("rpm: unable to open layer: %w", err)
53+
}
54+
55+
files := map[string]struct{}{}
56+
defer func() {
57+
// Defer setting the cache so any early-outs don't have to worry.
58+
fc.c[layer.Hash.String()] = files
59+
}()
60+
found := make([]foundDB, 0)
61+
if err := fs.WalkDir(sys, ".", findDBs(ctx, &found, sys)); err != nil {
62+
return nil, fmt.Errorf("rpm: error walking fs: %w", err)
63+
}
64+
if len(found) == 0 {
65+
return nil, nil
66+
}
67+
68+
done := map[string]struct{}{}
69+
zlog.Debug(ctx).Int("count", len(found)).Msg("found possible databases")
70+
for _, db := range found {
71+
ctx := zlog.ContextWithValues(ctx, "db", db.String())
72+
zlog.Debug(ctx).Msg("examining database")
73+
if _, ok := done[db.Path]; ok {
74+
zlog.Debug(ctx).Msg("already seen, skipping")
75+
continue
76+
}
77+
done[db.Path] = struct{}{}
78+
fs, err := getDBObjects(ctx, sys, db, filesFromDB)
79+
if err != nil {
80+
return nil, fmt.Errorf("rpm: error getting native DBs: %w", err)
81+
}
82+
for _, f := range fs {
83+
files[f.Path] = struct{}{}
84+
}
85+
}
86+
go func() {
87+
fc.gc(ctx, layer.Hash.String())
88+
}()
89+
90+
return files, nil
91+
}
92+
93+
// FileInstalledByRPM takes a claircore.Layer and filepath string and returns a boolean
94+
// signifying whether that file came from an RPM package.
95+
func FileInstalledByRPM(ctx context.Context, layer *claircore.Layer, filepath string) (bool, error) {
96+
files, err := fc.getFiles(ctx, layer)
97+
if err != nil {
98+
return false, err
99+
}
100+
_, exists := files[filepath]
101+
return exists, nil
102+
}

rpm/files_test.go

+77
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package rpm
2+
3+
import (
4+
"context"
5+
"testing"
6+
7+
"github.com/quay/claircore"
8+
"github.com/quay/claircore/test"
9+
"github.com/quay/zlog"
10+
)
11+
12+
var rpmFilesTestcases = []struct {
13+
name string
14+
isRPM bool
15+
filePath string
16+
layer test.LayerRef
17+
lenFiles int
18+
}{
19+
{
20+
name: "Found",
21+
isRPM: true,
22+
filePath: "usr/lib/node_modules/npm/node_modules/safe-buffer/package.json",
23+
layer: test.LayerRef{
24+
Registry: "registry.access.redhat.com",
25+
Name: "ubi9/nodejs-18",
26+
Digest: `sha256:1ae06b64755052cef4c32979aded82a18f664c66fa7b50a6d2924afac2849c6e`,
27+
},
28+
lenFiles: 100,
29+
},
30+
{
31+
name: "Not found",
32+
isRPM: false,
33+
filePath: "usr/lib/node_modules/npm/node_modules/safe-buffer/package.jsonx",
34+
layer: test.LayerRef{
35+
Registry: "registry.access.redhat.com",
36+
Name: "ubi9/nodejs-18",
37+
Digest: `sha256:1ae06b64755052cef4c32979aded82a18f664c66fa7b50a6d2924afac2849c6e`,
38+
},
39+
lenFiles: 100,
40+
},
41+
}
42+
43+
func TestIsRPMFile(t *testing.T) {
44+
ctx := zlog.Test(context.Background(), t)
45+
a := test.NewCachedArena(t)
46+
47+
for _, tt := range rpmFilesTestcases {
48+
t.Run(tt.name, func(t *testing.T) {
49+
a.LoadLayerFromRegistry(ctx, t, tt.layer)
50+
r := a.Realizer(ctx).(*test.CachedRealizer)
51+
t.Cleanup(func() {
52+
if err := r.Close(); err != nil {
53+
t.Error(err)
54+
}
55+
})
56+
57+
realizedLayers, err := r.RealizeDescriptions(ctx, []claircore.LayerDescription{
58+
{
59+
Digest: tt.layer.Digest,
60+
URI: "http://example.com",
61+
MediaType: test.MediaType,
62+
Headers: make(map[string][]string),
63+
},
64+
})
65+
if err != nil {
66+
t.Fatal(err)
67+
}
68+
isRPM, err := FileInstalledByRPM(ctx, &realizedLayers[0], tt.filePath)
69+
if err != nil {
70+
t.Fatal(err)
71+
}
72+
if tt.isRPM != isRPM {
73+
t.Errorf("expected isRPM: %t, got isRPM: %t", tt.isRPM, isRPM)
74+
}
75+
})
76+
}
77+
}

0 commit comments

Comments
 (0)