-
Notifications
You must be signed in to change notification settings - Fork 7
/
walk.go
405 lines (348 loc) · 10.3 KB
/
walk.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
package pathlib
import (
"errors"
"fmt"
"os"
"slices"
)
// WalkOpts is the struct that defines how a walk should be performed
type WalkOpts struct {
// Depth defines how far down a directory we should recurse. A value of -1 means
// infinite depth. 0 means only the direct children of root will be returned, etc.
Depth int
// Algorithm specifies the algoritm that the Walk() function should use to
// traverse the directory.
Algorithm Algorithm
// FollowSymlinks defines whether symlinks should be dereferenced or not. If True,
// the symlink itself will never be returned to WalkFunc, but rather whatever it
// points to.
FollowSymlinks bool
// MinimumFileSize specifies the minimum size of a file for visitation.
// If negative, there is no minimum size.
MinimumFileSize int64
// MaximumFileSize specifies the maximum size of a file for visitation.
// If negative, there is no maximum size.
MaximumFileSize int64
// VisitFiles specifies that we should visit regular files during
// the walk.
VisitFiles bool
// VisitDirs specifies that we should visit directories during the walk.
VisitDirs bool
// VisitSymlinks specifies that we should visit symlinks during the walk.
VisitSymlinks bool
// SortChildren causes all children of a path to be lexigraphically sorted before
// being sent to the WalkFunc.
SortChildren bool
}
// DefaultWalkOpts returns the default WalkOpts struct used when
// walking a directory.
func DefaultWalkOpts() *WalkOpts {
return &WalkOpts{
Depth: -1,
Algorithm: AlgorithmBasic,
FollowSymlinks: false,
MinimumFileSize: -1,
MaximumFileSize: -1,
VisitFiles: true,
VisitDirs: true,
VisitSymlinks: true,
SortChildren: false,
}
}
// MeetsMinimumSize returns whether size is at least the minimum specified.
func (w *WalkOpts) MeetsMinimumSize(size int64) bool {
if w.MinimumFileSize < 0 {
return true
}
return size >= w.MinimumFileSize
}
// MeetsMaximumSize returns whether size is less than or equal to the maximum specified.
func (w *WalkOpts) MeetsMaximumSize(size int64) bool {
if w.MaximumFileSize < 0 {
return true
}
return size <= w.MaximumFileSize
}
// Algorithm represents the walk algorithm that will be performed.
type Algorithm int
const (
// AlgorithmBasic is a walk algorithm. It iterates over filesystem objects in the
// order in which they are returned by the operating system. It guarantees no
// ordering of any kind. It will recurse into subdirectories as soon as it encounters them,
// and will continue iterating the remaining children after the recursion is complete.
// It behaves as a quasi-DFS algorithm.
AlgorithmBasic Algorithm = iota
// AlgorithmDepthFirst is a walk algorithm. More specifically, it is a post-order
// depth first search whereby subdirectories are recursed into before
// visiting the children of the current directory.
// DEPRECATED: Use AlgorithmPostOrderDepthFirst
AlgorithmDepthFirst
// AlgorithmPostOrderDepthFirst is a walk algorithm that recurses into all of its children
// before visiting any of a node's elements.
AlgorithmPostOrderDepthFirst
// AlgorithmPreOrderDepthFirst is a walk algorithm. It visits all of a node's elements
// before recursing into its children.
AlgorithmPreOrderDepthFirst
)
// Walk is an object that handles walking through a directory tree
type Walk struct {
Opts *WalkOpts
root *Path
}
type WalkOptsFunc func(config *WalkOpts)
func WalkDepth(depth int) WalkOptsFunc {
return func(config *WalkOpts) {
config.Depth = depth
}
}
func WalkAlgorithm(algo Algorithm) WalkOptsFunc {
return func(config *WalkOpts) {
config.Algorithm = algo
}
}
func WalkFollowSymlinks(follow bool) WalkOptsFunc {
return func(config *WalkOpts) {
config.FollowSymlinks = follow
}
}
func WalkMinimumFileSize(size int64) WalkOptsFunc {
return func(config *WalkOpts) {
config.MinimumFileSize = size
}
}
func WalkMaximumFileSize(size int64) WalkOptsFunc {
return func(config *WalkOpts) {
config.MaximumFileSize = size
}
}
func WalkVisitFiles(value bool) WalkOptsFunc {
return func(config *WalkOpts) {
config.VisitFiles = value
}
}
func WalkVisitDirs(value bool) WalkOptsFunc {
return func(config *WalkOpts) {
config.VisitDirs = value
}
}
func WalkVisitSymlinks(value bool) WalkOptsFunc {
return func(config *WalkOpts) {
config.VisitSymlinks = value
}
}
func WalkSortChildren(value bool) WalkOptsFunc {
return func(config *WalkOpts) {
config.SortChildren = value
}
}
// NewWalk returns a new Walk struct with default values applied
func NewWalk(root *Path, opts ...WalkOptsFunc) (*Walk, error) {
config := DefaultWalkOpts()
for _, opt := range opts {
opt(config)
}
return NewWalkWithOpts(root, config)
}
// NewWalkWithOpts returns a Walk object with the given WalkOpts applied
func NewWalkWithOpts(root *Path, opts *WalkOpts) (*Walk, error) {
if root == nil {
return nil, fmt.Errorf("root path can't be nil")
}
if opts == nil {
return nil, fmt.Errorf("opts can't be nil")
}
return &Walk{
Opts: opts,
root: root,
}, nil
}
func (w *Walk) maxDepthReached(currentDepth int) bool {
if w.Opts.Depth >= 0 && currentDepth > w.Opts.Depth {
return true
}
return false
}
type dfsObjectInfo struct {
path *Path
info os.FileInfo
err error
}
func (w *Walk) walkDFS(walkFn WalkFunc, root *Path, currentDepth int) error {
if w.maxDepthReached(currentDepth) {
return nil
}
var children []*dfsObjectInfo
if err := w.iterateImmediateChildren(root, func(child *Path, info os.FileInfo, encounteredErr error) error {
// Since we are doing depth-first, we have to first recurse through all the directories,
// and save all non-directory objects so we can defer handling at a later time.
if IsDir(info.Mode()) {
if err := w.walkDFS(walkFn, child, currentDepth+1); err != nil && !errors.Is(err, ErrWalkSkipSubtree) {
return err
}
}
children = append(children, &dfsObjectInfo{
path: child,
info: info,
err: encounteredErr,
})
return nil
}); err != nil {
return err
}
// Iterate over all children after all subdirs have been recursed
for _, child := range children {
passesQuery, err := w.passesQuerySpecification(child.info)
if err != nil {
return err
}
if passesQuery {
if err := walkFn(child.path, child.info, child.err); err != nil {
return err
}
}
}
return nil
}
// iterateImmediateChildren is a function that handles discovering root's immediate children,
// and will run the algorithm function for every child. The algorithm function is essentially
// what differentiates how each walk behaves, and determines what actions to take given a
// certain child.
func (w *Walk) iterateImmediateChildren(root *Path, algorithmFunction WalkFunc) error {
children, err := root.ReadDir()
if err != nil {
return err
}
if w.Opts.SortChildren {
slices.SortFunc[[]*Path, *Path](children, func(a *Path, b *Path) int {
if a.String() < b.String() {
return -1
}
if a.String() == b.String() {
return 0
}
return 1
})
}
var info os.FileInfo
for _, child := range children {
if child.String() == root.String() {
continue
}
if w.Opts.FollowSymlinks {
info, err = child.Stat()
if err != nil {
return err
}
} else {
info, err = child.Lstat()
}
if info == nil {
if err != nil {
return err
}
return ErrInfoIsNil
}
if algoErr := algorithmFunction(child, info, err); algoErr != nil {
return algoErr
}
}
return nil
}
// passesQuerySpecification returns whether or not the object described by
// the os.FileInfo passes all of the query specifications listed in
// the walk options.
func (w *Walk) passesQuerySpecification(info os.FileInfo) (bool, error) {
if IsFile(info.Mode()) {
if !w.Opts.VisitFiles {
return false, nil
}
if !w.Opts.MeetsMinimumSize(info.Size()) ||
!w.Opts.MeetsMaximumSize(info.Size()) {
return false, nil
}
} else if IsDir(info.Mode()) && !w.Opts.VisitDirs {
return false, nil
} else if IsSymlink(info.Mode()) && !w.Opts.VisitSymlinks {
return false, nil
}
return true, nil
}
func (w *Walk) walkBasic(walkFn WalkFunc, root *Path, currentDepth int) error {
if w.maxDepthReached(currentDepth) {
return nil
}
err := w.iterateImmediateChildren(root, func(child *Path, info os.FileInfo, encounteredErr error) error {
if IsDir(info.Mode()) {
// In the case the error is ErrWalkSkipSubtree, we ignore it as we've already
// exited from the recursive call. Any other error should be bubbled up.
if err := w.walkBasic(walkFn, child, currentDepth+1); err != nil && !errors.Is(err, ErrWalkSkipSubtree) {
return err
}
}
passesQuery, err := w.passesQuerySpecification(info)
if err != nil {
return err
}
if passesQuery {
if err := walkFn(child, info, encounteredErr); err != nil {
return err
}
}
return nil
})
return err
}
func (w *Walk) walkPreOrderDFS(walkFn WalkFunc, root *Path, currentDepth int) error {
if w.maxDepthReached(currentDepth) {
return nil
}
dirs := []*Path{}
err := w.iterateImmediateChildren(root, func(child *Path, info os.FileInfo, encounteredErr error) error {
if IsDir(info.Mode()) {
dirs = append(dirs, child)
}
passesQuery, err := w.passesQuerySpecification(info)
if err != nil {
return err
}
if passesQuery {
if err := walkFn(child, info, encounteredErr); err != nil {
return err
}
}
return nil
})
if err != nil {
return err
}
for _, dir := range dirs {
if err := w.walkPreOrderDFS(walkFn, dir, currentDepth+1); err != nil && !errors.Is(err, ErrWalkSkipSubtree) {
return err
}
}
return nil
}
// WalkFunc is the function provided to the Walk function for each directory.
type WalkFunc func(path *Path, info os.FileInfo, err error) error
// Walk walks the directory using the algorithm specified in the configuration. Your WalkFunc
// may return any of the ErrWalk* errors to control various behavior of the walker. See the documentation
// of each error for more details.
func (w *Walk) Walk(walkFn WalkFunc) error {
funcs := map[Algorithm]func(walkFn WalkFunc, root *Path, currentDepth int) error{
AlgorithmBasic: w.walkBasic,
AlgorithmDepthFirst: w.walkDFS,
AlgorithmPostOrderDepthFirst: w.walkDFS,
AlgorithmPreOrderDepthFirst: w.walkPreOrderDFS,
}
algoFunc, ok := funcs[w.Opts.Algorithm]
if !ok {
return ErrInvalidAlgorithm
}
if err := algoFunc(walkFn, w.root, 0); err != nil {
if errors.Is(err, errWalkControl) {
return nil
}
return err
}
return nil
}