fastwalk.go 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. // Copyright 2016 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package fastwalk provides a faster version of filepath.Walk for file system
  5. // scanning tools.
  6. package fastwalk
  7. import (
  8. "errors"
  9. "os"
  10. "path/filepath"
  11. "runtime"
  12. "sync"
  13. )
  14. // TraverseLink is used as a return value from WalkFuncs to indicate that the
  15. // symlink named in the call may be traversed.
  16. var TraverseLink = errors.New("fastwalk: traverse symlink, assuming target is a directory")
  17. // SkipFiles is a used as a return value from WalkFuncs to indicate that the
  18. // callback should not be called for any other files in the current directory.
  19. // Child directories will still be traversed.
  20. var SkipFiles = errors.New("fastwalk: skip remaining files in directory")
  21. // Walk is a faster implementation of filepath.Walk.
  22. //
  23. // filepath.Walk's design necessarily calls os.Lstat on each file,
  24. // even if the caller needs less info.
  25. // Many tools need only the type of each file.
  26. // On some platforms, this information is provided directly by the readdir
  27. // system call, avoiding the need to stat each file individually.
  28. // fastwalk_unix.go contains a fork of the syscall routines.
  29. //
  30. // See golang.org/issue/16399
  31. //
  32. // Walk walks the file tree rooted at root, calling walkFn for
  33. // each file or directory in the tree, including root.
  34. //
  35. // If fastWalk returns filepath.SkipDir, the directory is skipped.
  36. //
  37. // Unlike filepath.Walk:
  38. // * file stat calls must be done by the user.
  39. // The only provided metadata is the file type, which does not include
  40. // any permission bits.
  41. // * multiple goroutines stat the filesystem concurrently. The provided
  42. // walkFn must be safe for concurrent use.
  43. // * fastWalk can follow symlinks if walkFn returns the TraverseLink
  44. // sentinel error. It is the walkFn's responsibility to prevent
  45. // fastWalk from going into symlink cycles.
  46. func Walk(root string, walkFn func(path string, typ os.FileMode) error) error {
  47. // TODO(bradfitz): make numWorkers configurable? We used a
  48. // minimum of 4 to give the kernel more info about multiple
  49. // things we want, in hopes its I/O scheduling can take
  50. // advantage of that. Hopefully most are in cache. Maybe 4 is
  51. // even too low of a minimum. Profile more.
  52. numWorkers := 4
  53. if n := runtime.NumCPU(); n > numWorkers {
  54. numWorkers = n
  55. }
  56. // Make sure to wait for all workers to finish, otherwise
  57. // walkFn could still be called after returning. This Wait call
  58. // runs after close(e.donec) below.
  59. var wg sync.WaitGroup
  60. defer wg.Wait()
  61. w := &walker{
  62. fn: walkFn,
  63. enqueuec: make(chan walkItem, numWorkers), // buffered for performance
  64. workc: make(chan walkItem, numWorkers), // buffered for performance
  65. donec: make(chan struct{}),
  66. // buffered for correctness & not leaking goroutines:
  67. resc: make(chan error, numWorkers),
  68. }
  69. defer close(w.donec)
  70. for i := 0; i < numWorkers; i++ {
  71. wg.Add(1)
  72. go w.doWork(&wg)
  73. }
  74. todo := []walkItem{{dir: root}}
  75. out := 0
  76. for {
  77. workc := w.workc
  78. var workItem walkItem
  79. if len(todo) == 0 {
  80. workc = nil
  81. } else {
  82. workItem = todo[len(todo)-1]
  83. }
  84. select {
  85. case workc <- workItem:
  86. todo = todo[:len(todo)-1]
  87. out++
  88. case it := <-w.enqueuec:
  89. todo = append(todo, it)
  90. case err := <-w.resc:
  91. out--
  92. if err != nil {
  93. return err
  94. }
  95. if out == 0 && len(todo) == 0 {
  96. // It's safe to quit here, as long as the buffered
  97. // enqueue channel isn't also readable, which might
  98. // happen if the worker sends both another unit of
  99. // work and its result before the other select was
  100. // scheduled and both w.resc and w.enqueuec were
  101. // readable.
  102. select {
  103. case it := <-w.enqueuec:
  104. todo = append(todo, it)
  105. default:
  106. return nil
  107. }
  108. }
  109. }
  110. }
  111. }
  112. // doWork reads directories as instructed (via workc) and runs the
  113. // user's callback function.
  114. func (w *walker) doWork(wg *sync.WaitGroup) {
  115. defer wg.Done()
  116. for {
  117. select {
  118. case <-w.donec:
  119. return
  120. case it := <-w.workc:
  121. select {
  122. case <-w.donec:
  123. return
  124. case w.resc <- w.walk(it.dir, !it.callbackDone):
  125. }
  126. }
  127. }
  128. }
  129. type walker struct {
  130. fn func(path string, typ os.FileMode) error
  131. donec chan struct{} // closed on fastWalk's return
  132. workc chan walkItem // to workers
  133. enqueuec chan walkItem // from workers
  134. resc chan error // from workers
  135. }
  136. type walkItem struct {
  137. dir string
  138. callbackDone bool // callback already called; don't do it again
  139. }
  140. func (w *walker) enqueue(it walkItem) {
  141. select {
  142. case w.enqueuec <- it:
  143. case <-w.donec:
  144. }
  145. }
  146. func (w *walker) onDirEnt(dirName, baseName string, typ os.FileMode) error {
  147. joined := dirName + string(os.PathSeparator) + baseName
  148. if typ == os.ModeDir {
  149. w.enqueue(walkItem{dir: joined})
  150. return nil
  151. }
  152. err := w.fn(joined, typ)
  153. if typ == os.ModeSymlink {
  154. if err == TraverseLink {
  155. // Set callbackDone so we don't call it twice for both the
  156. // symlink-as-symlink and the symlink-as-directory later:
  157. w.enqueue(walkItem{dir: joined, callbackDone: true})
  158. return nil
  159. }
  160. if err == filepath.SkipDir {
  161. // Permit SkipDir on symlinks too.
  162. return nil
  163. }
  164. }
  165. return err
  166. }
  167. func (w *walker) walk(root string, runUserCallback bool) error {
  168. if runUserCallback {
  169. err := w.fn(root, os.ModeDir)
  170. if err == filepath.SkipDir {
  171. return nil
  172. }
  173. if err != nil {
  174. return err
  175. }
  176. }
  177. return readDir(root, w.onDirEnt)
  178. }