summaryrefslogtreecommitdiffstats
path: root/pkg/pwalkdir/pwalkdir.go
blob: 0f5d9f580d1d33ff406da58f1495e45be253964e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
//go:build go1.16
// +build go1.16

package pwalkdir

import (
	"fmt"
	"io/fs"
	"path/filepath"
	"runtime"
	"sync"
)

// Walk is a wrapper for filepath.WalkDir which can call multiple walkFn
// in parallel, allowing to handle each item concurrently. A maximum of
// twice the runtime.NumCPU() walkFn will be called at any one time.
// If you want to change the maximum, use WalkN instead.
//
// The order of calls is non-deterministic.
//
// Note that this implementation only supports primitive error handling:
//
// - no errors are ever passed to walkFn;
//
// - once a walkFn returns any error, all further processing stops
// and the error is returned to the caller of Walk;
//
// - filepath.SkipDir is not supported;
//
// - if more than one walkFn instance will return an error, only one
// of such errors will be propagated and returned by Walk, others
// will be silently discarded.
func Walk(root string, walkFn fs.WalkDirFunc) error {
	return WalkN(root, walkFn, runtime.NumCPU()*2)
}

// WalkN is a wrapper for filepath.WalkDir which can call multiple walkFn
// in parallel, allowing to handle each item concurrently. A maximum of
// num walkFn will be called at any one time.
//
// Please see Walk documentation for caveats of using this function.
func WalkN(root string, walkFn fs.WalkDirFunc, num int) error {
	// make sure limit is sensible
	if num < 1 {
		return fmt.Errorf("walk(%q): num must be > 0", root)
	}

	files := make(chan *walkArgs, 2*num)
	errCh := make(chan error, 1) // Get the first error, ignore others.

	// Start walking a tree asap.
	var (
		err error
		wg  sync.WaitGroup

		rootLen   = len(root)
		rootEntry *walkArgs
	)
	wg.Add(1)
	go func() {
		err = filepath.WalkDir(root, func(p string, entry fs.DirEntry, err error) error {
			if err != nil {
				close(files)
				return err
			}
			if len(p) == rootLen {
				// Root entry is processed separately below.
				rootEntry = &walkArgs{path: p, entry: entry}
				return nil
			}
			// Add a file to the queue unless a callback sent an error.
			select {
			case e := <-errCh:
				close(files)
				return e
			default:
				files <- &walkArgs{path: p, entry: entry}
				return nil
			}
		})
		if err == nil {
			close(files)
		}
		wg.Done()
	}()

	wg.Add(num)
	for i := 0; i < num; i++ {
		go func() {
			for file := range files {
				if e := walkFn(file.path, file.entry, nil); e != nil {
					select {
					case errCh <- e: // sent ok
					default: // buffer full
					}
				}
			}
			wg.Done()
		}()
	}

	wg.Wait()

	if err == nil {
		err = walkFn(rootEntry.path, rootEntry.entry, nil)
	}

	return err
}

// walkArgs holds the arguments that were passed to the Walk or WalkN
// functions.
type walkArgs struct {
	entry fs.DirEntry
	path  string
}