123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196 |
- // Copyright 2016 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- // Package fastwalk provides a faster version of filepath.Walk for file system
- // scanning tools.
- package fastwalk
- import (
- "errors"
- "os"
- "path/filepath"
- "runtime"
- "sync"
- )
- // TraverseLink is used as a return value from WalkFuncs to indicate that the
- // symlink named in the call may be traversed.
- var TraverseLink = errors.New("fastwalk: traverse symlink, assuming target is a directory")
- // SkipFiles is a used as a return value from WalkFuncs to indicate that the
- // callback should not be called for any other files in the current directory.
- // Child directories will still be traversed.
- var SkipFiles = errors.New("fastwalk: skip remaining files in directory")
- // Walk is a faster implementation of filepath.Walk.
- //
- // filepath.Walk's design necessarily calls os.Lstat on each file,
- // even if the caller needs less info.
- // Many tools need only the type of each file.
- // On some platforms, this information is provided directly by the readdir
- // system call, avoiding the need to stat each file individually.
- // fastwalk_unix.go contains a fork of the syscall routines.
- //
- // See golang.org/issue/16399
- //
- // Walk walks the file tree rooted at root, calling walkFn for
- // each file or directory in the tree, including root.
- //
- // If fastWalk returns filepath.SkipDir, the directory is skipped.
- //
- // Unlike filepath.Walk:
- // * file stat calls must be done by the user.
- // The only provided metadata is the file type, which does not include
- // any permission bits.
- // * multiple goroutines stat the filesystem concurrently. The provided
- // walkFn must be safe for concurrent use.
- // * fastWalk can follow symlinks if walkFn returns the TraverseLink
- // sentinel error. It is the walkFn's responsibility to prevent
- // fastWalk from going into symlink cycles.
- func Walk(root string, walkFn func(path string, typ os.FileMode) error) error {
- // TODO(bradfitz): make numWorkers configurable? We used a
- // minimum of 4 to give the kernel more info about multiple
- // things we want, in hopes its I/O scheduling can take
- // advantage of that. Hopefully most are in cache. Maybe 4 is
- // even too low of a minimum. Profile more.
- numWorkers := 4
- if n := runtime.NumCPU(); n > numWorkers {
- numWorkers = n
- }
- // Make sure to wait for all workers to finish, otherwise
- // walkFn could still be called after returning. This Wait call
- // runs after close(e.donec) below.
- var wg sync.WaitGroup
- defer wg.Wait()
- w := &walker{
- fn: walkFn,
- enqueuec: make(chan walkItem, numWorkers), // buffered for performance
- workc: make(chan walkItem, numWorkers), // buffered for performance
- donec: make(chan struct{}),
- // buffered for correctness & not leaking goroutines:
- resc: make(chan error, numWorkers),
- }
- defer close(w.donec)
- for i := 0; i < numWorkers; i++ {
- wg.Add(1)
- go w.doWork(&wg)
- }
- todo := []walkItem{{dir: root}}
- out := 0
- for {
- workc := w.workc
- var workItem walkItem
- if len(todo) == 0 {
- workc = nil
- } else {
- workItem = todo[len(todo)-1]
- }
- select {
- case workc <- workItem:
- todo = todo[:len(todo)-1]
- out++
- case it := <-w.enqueuec:
- todo = append(todo, it)
- case err := <-w.resc:
- out--
- if err != nil {
- return err
- }
- if out == 0 && len(todo) == 0 {
- // It's safe to quit here, as long as the buffered
- // enqueue channel isn't also readable, which might
- // happen if the worker sends both another unit of
- // work and its result before the other select was
- // scheduled and both w.resc and w.enqueuec were
- // readable.
- select {
- case it := <-w.enqueuec:
- todo = append(todo, it)
- default:
- return nil
- }
- }
- }
- }
- }
- // doWork reads directories as instructed (via workc) and runs the
- // user's callback function.
- func (w *walker) doWork(wg *sync.WaitGroup) {
- defer wg.Done()
- for {
- select {
- case <-w.donec:
- return
- case it := <-w.workc:
- select {
- case <-w.donec:
- return
- case w.resc <- w.walk(it.dir, !it.callbackDone):
- }
- }
- }
- }
- type walker struct {
- fn func(path string, typ os.FileMode) error
- donec chan struct{} // closed on fastWalk's return
- workc chan walkItem // to workers
- enqueuec chan walkItem // from workers
- resc chan error // from workers
- }
- type walkItem struct {
- dir string
- callbackDone bool // callback already called; don't do it again
- }
- func (w *walker) enqueue(it walkItem) {
- select {
- case w.enqueuec <- it:
- case <-w.donec:
- }
- }
- func (w *walker) onDirEnt(dirName, baseName string, typ os.FileMode) error {
- joined := dirName + string(os.PathSeparator) + baseName
- if typ == os.ModeDir {
- w.enqueue(walkItem{dir: joined})
- return nil
- }
- err := w.fn(joined, typ)
- if typ == os.ModeSymlink {
- if err == TraverseLink {
- // Set callbackDone so we don't call it twice for both the
- // symlink-as-symlink and the symlink-as-directory later:
- w.enqueue(walkItem{dir: joined, callbackDone: true})
- return nil
- }
- if err == filepath.SkipDir {
- // Permit SkipDir on symlinks too.
- return nil
- }
- }
- return err
- }
- func (w *walker) walk(root string, runUserCallback bool) error {
- if runUserCallback {
- err := w.fn(root, os.ModeDir)
- if err == filepath.SkipDir {
- return nil
- }
- if err != nil {
- return err
- }
- }
- return readDir(root, w.onDirEnt)
- }
|