From e8e85d3b964ceee786bf6517cef8daca69f805f4 Mon Sep 17 00:00:00 2001 From: Michael Woolnough Date: Tue, 17 Dec 2024 15:31:57 +0000 Subject: [PATCH 1/8] Rewrite walk to use less RAM --- go.mod | 1 - go.sum | 2 - walk/dirent.go | 134 ++++++------------- walk/dirent_test.go | 7 - walk/file.go | 15 ++- walk/walk.go | 305 +++++++++++++++++++++++++++++--------------- walk/walk_test.go | 2 +- 7 files changed, 252 insertions(+), 214 deletions(-) diff --git a/go.mod b/go.mod index 89091984..f2db33c3 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,6 @@ require ( github.com/smartystreets/goconvey v1.7.2 github.com/spf13/cobra v1.8.1 github.com/termie/go-shutil v0.0.0-20140729215957-bcacb06fecae - github.com/wtsi-hgi/godirwalk v1.18.1 github.com/wtsi-ssg/wr v0.5.9 ) diff --git a/go.sum b/go.sum index 9bc6b957..064a3a97 100644 --- a/go.sum +++ b/go.sum @@ -284,8 +284,6 @@ github.com/tklauser/numcpus v0.9.0 h1:lmyCHtANi8aRUgkckBgoDk1nHCux3n2cgkJLXdQGPD github.com/tklauser/numcpus v0.9.0/go.mod h1:SN6Nq1O3VychhC1npsWostA+oW+VOQTxZrS604NSRyI= github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= -github.com/wtsi-hgi/godirwalk v1.18.1 h1:t7eaGXYBfTtfIEGLizPCC9fzASTvZtdhKEEri8TyyJs= -github.com/wtsi-hgi/godirwalk v1.18.1/go.mod h1:rLa4FlI9kdT7o67jwFos8qgaX3K2sMC6XI4FXJ1iVyk= github.com/wtsi-ssg/wr v0.5.9 h1:lJWNuJfVvhTpXQqxRN5RbffhvK3HMog0fFpUFznvoz8= github.com/wtsi-ssg/wr v0.5.9/go.mod h1:njSdCX+xv1xzzw3Oy3Smid6s/IyIQEvLsKbRwaq4fC8= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= diff --git a/walk/dirent.go b/walk/dirent.go index 4a51a898..8ef168bd 100644 --- a/walk/dirent.go +++ b/walk/dirent.go @@ -29,114 +29,64 @@ package walk import ( "io/fs" "os" - "sync" - "unsafe" - - "github.com/wtsi-hgi/godirwalk" -) - -var ( - filePathPool64 = sync.Pool{New: func() any { x := make(FilePath, 0, 64); return &x }} //nolint:gochecknoglobals,mnd,nlreturn,lll - filePathPool128 = sync.Pool{New: func() any { x := make(FilePath, 0, 128); return &x }} //nolint:gochecknoglobals,mnd,nlreturn,lll - filePathPool256 = sync.Pool{New: func() any { x := make(FilePath, 0, 256); return &x }} //nolint:gochecknoglobals,mnd,nlreturn,lll - filePathPool512 = sync.Pool{New: func() any { x := make(FilePath, 0, 512); return &x }} //nolint:gochecknoglobals,mnd,nlreturn,lll - filePathPool1024 = sync.Pool{New: func() any { x := make(FilePath, 0, 1024); return &x }} //nolint:gochecknoglobals,mnd,nlreturn,lll - filePathPool2048 = sync.Pool{New: func() any { x := make(FilePath, 0, 2048); return &x }} //nolint:gochecknoglobals,mnd,nlreturn,lll - filePathPool4096 = sync.Pool{New: func() any { x := make(FilePath, 0, 4096); return &x }} //nolint:gochecknoglobals,mnd,nlreturn,lll + "strings" ) -// FilePath is a byte-slice of a path, utilising object pools to reduce memory -// allocations. -// -// It is the clients responsibility to call the Done method once it is no longer -// needed. -type FilePath []byte - -func newFilePathSize(size int) *FilePath { - switch { - case size <= 64: //nolint:mnd - return filePathPool64.Get().(*FilePath) //nolint:forcetypeassert - case size <= 128: //nolint:mnd - return filePathPool128.Get().(*FilePath) //nolint:forcetypeassert - case size <= 256: //nolint:mnd - return filePathPool256.Get().(*FilePath) //nolint:forcetypeassert - case size <= 512: //nolint:mnd - return filePathPool512.Get().(*FilePath) //nolint:forcetypeassert - case size <= 1024: //nolint:mnd - return filePathPool1024.Get().(*FilePath) //nolint:forcetypeassert - case size <= 2048: //nolint:mnd - return filePathPool2048.Get().(*FilePath) //nolint:forcetypeassert - } - - return filePathPool4096.Get().(*FilePath) //nolint:forcetypeassert +// FilePath is a byte-slice of a path. +type FilePath struct { + parent *FilePath + name string + depth uint16 } // NewFilePath creates a new FilePath, setting the value to the given string. -func NewFilePath(path string) *FilePath { - c := newFilePathSize(len(path)) - c.writeString(path) - - return c +func NewFilePath(path string) FilePath { + return FilePath{name: path} } -func (f *FilePath) writeString(str string) { - *f = append(*f, str...) +func (f *FilePath) appendTo(p []byte) []byte { + if f.parent != nil { + p = f.parent.appendTo(p) + } + + return append(p, f.name...) } -func (f *FilePath) writeBytes(p []byte) { - *f = append(*f, p...) +// Bytes returns the FilePath as a literal byte-slice. +func (f *FilePath) Bytes() []byte { + return f.appendTo(nil) } -// Done deallocates the underlying byte-slice; any uses of the Bytes method are -// now invalid and may change. -func (f *FilePath) Done() { //nolint:gocyclo - *f = (*f)[:0] - - switch cap(*f) { - case 64: //nolint:mnd - filePathPool64.Put(f) - case 128: //nolint:mnd - filePathPool128.Put(f) - case 256: //nolint:mnd - filePathPool256.Put(f) - case 512: //nolint:mnd - filePathPool512.Put(f) - case 1024: //nolint:mnd - filePathPool1024.Put(f) - case 2048: //nolint:mnd - filePathPool2048.Put(f) - case 4096: //nolint:mnd - filePathPool4096.Put(f) +func (f *FilePath) compare(g *FilePath) int { + if f.depth < g.depth { + return f.compareTo(g.getDepth(f.depth)) + } else if f.depth > g.depth { + return f.getDepth(g.depth).compareTo(g) } -} -func (f *FilePath) sub(d *godirwalk.Dirent) *FilePath { - name := d.Name() - size := len(*f) + len(name) + return f.compareTo(g) +} - if d.IsDir() { - size++ +func (f *FilePath) getDepth(n uint16) *FilePath { + for f.depth != n { + f = f.parent } - c := newFilePathSize(size) - - c.writeBytes(*f) - c.writeString(name) + return f +} - if d.IsDir() { - c.writeString("/") +func (f *FilePath) compareTo(g *FilePath) int { + if f == g { + return 0 } - return c -} + cmp := f.parent.compareTo(g.parent) -// Bytes returns the FilePath as a literal byte-slice. -func (f *FilePath) Bytes() []byte { - return *f -} + if cmp == 0 { + return strings.Compare(f.name, g.name) + } -func (f *FilePath) string() string { - return unsafe.String(&(*f)[0], len(*f)) + return cmp } // Dirent represents a file system directory entry (a file or a directory), @@ -144,21 +94,15 @@ func (f *FilePath) string() string { type Dirent struct { // Path is the complete path to the directory entry (including both // directory and basename) - Path *FilePath + Path FilePath // Type is the type bits of the file mode of this entry. - Type os.FileMode + Type fs.FileMode // Inode is the file system inode number for this entry. Inode uint64 } -// newDirentForDirectoryPath returns a Dirent for the given directory, with -// a Type for directories and no Inode. -func newDirentForDirectoryPath(dir string) Dirent { - return Dirent{Path: NewFilePath(dir), Type: fs.ModeDir} -} - // IsDir returns true if we are a directory. func (d *Dirent) IsDir() bool { return d.Type.IsDir() diff --git a/walk/dirent_test.go b/walk/dirent_test.go index 7bfa648c..2775222b 100644 --- a/walk/dirent_test.go +++ b/walk/dirent_test.go @@ -71,11 +71,4 @@ func TestDirent(t *testing.T) { So(d.IsRegular(), ShouldBeFalse) So(d.IsSymlink(), ShouldBeTrue) }) - - Convey("You can make a fake Direct for directories", t, func() { - d := newDirentForDirectoryPath("/a/dir") - So(d.IsDir(), ShouldBeTrue) - So(d.IsRegular(), ShouldBeFalse) - So(d.IsSymlink(), ShouldBeFalse) - }) } diff --git a/walk/file.go b/walk/file.go index 21b0b03a..3e1f4ad5 100644 --- a/walk/file.go +++ b/walk/file.go @@ -33,10 +33,13 @@ import ( "path/filepath" "strconv" "sync" + "unsafe" ) const userOnlyPerm = 0700 +const maxPathLength = 4096 + // non-ascii bytes could become \xXX (4x the length at worst), the two // speech-marks are +2 and a newline is +1. const maxQuotedPathLength = 4096*4 + 2 + 1 @@ -162,12 +165,16 @@ func NewFiles(outDir string, n int) (*Files, error) { // // It will terminate the walk if writes to our output files fail. func (f *Files) WritePaths() PathCallback { - var quoted [maxQuotedPathLength]byte + var ( + quoted [maxQuotedPathLength]byte + tmpPath [maxPathLength]byte + ) return func(entry *Dirent) error { - defer entry.Path.Done() - - return f.writePath(append(strconv.AppendQuote(quoted[:0], entry.Path.string()), '\n')) + return f.writePath(append( + strconv.AppendQuote( + quoted[:0], unsafe.String(&tmpPath[0], len(entry.Path.appendTo(tmpPath[:0]))), + ), '\n')) } } diff --git a/walk/walk.go b/walk/walk.go index a42f3d73..0ae54490 100644 --- a/walk/walk.go +++ b/walk/walk.go @@ -30,15 +30,16 @@ package walk import ( + "bytes" "context" + "errors" + "io/fs" "os" "path/filepath" "slices" - "sort" - "strings" "sync" - - "github.com/wtsi-hgi/godirwalk" + "syscall" + "unsafe" ) const walkers = 16 @@ -78,16 +79,9 @@ func New(cb PathCallback, includDirs, ignoreSymlinks bool) *Walker { type ErrorCallback func(path string, err error) type pathRequest struct { - path *FilePath - response chan []Dirent -} - -var pathRequestPool = sync.Pool{ //nolint:gochecknoglobals - New: func() any { - return &pathRequest{ - response: make(chan []Dirent), - } - }, + Dirent + next *pathRequest + ready sync.Mutex } // Walk will discover all the paths nested under the given dir, and send them to @@ -102,45 +96,43 @@ func (w *Walker) Walk(dir string, errCB ErrorCallback) error { dir = filepath.Clean(dir) + "/" requestCh := make(chan *pathRequest) sortedRequestCh := make(chan *pathRequest) - direntCh := make(chan Dirent, dirsChSize) - flowControl := newController() ctx, stop := context.WithCancel(context.Background()) for range walkers { - go w.handleDirReads(ctx, sortedRequestCh, errCB) + go w.handleDirReads(ctx, sortedRequestCh, requestCh, errCB, w.ignoreSymlinks) } - go func() { - walkDirectory(ctx, newDirentForDirectoryPath(dir), - flowControl, createPathRequestor(requestCh), w.sendDirs) - close(direntCh) - }() - go sortPathRequests(ctx, requestCh, sortedRequestCh) - go flowControl.PassControl(direntCh) - defer stop() - - return w.sendDirentsToPathCallback(direntCh) -} + r := &pathRequest{ + Dirent: Dirent{ + Path: NewFilePath(dir), + Type: fs.ModeDir, + Inode: 0, + }, + } -func createPathRequestor(requestCh chan *pathRequest) func(*FilePath) []Dirent { - return func(path *FilePath) []Dirent { - pr := pathRequestPool.Get().(*pathRequest) //nolint:errcheck,forcetypeassert - defer pathRequestPool.Put(pr) + r.ready.Lock() - pr.path = path + sortedRequestCh <- r - requestCh <- pr + defer stop() - return <-pr.response - } + return w.sendDirentsToPathCallback(r) } -func (w *Walker) sendDirentsToPathCallback(direntCh <-chan Dirent) error { - for dirent := range direntCh { - if err := w.pathCB(&dirent); err != nil { - return err +func (w *Walker) sendDirentsToPathCallback(r *pathRequest) error { + for ; r != nil; r = r.next { + isDir := r.IsDir() + + if w.sendDirs || !isDir { + if err := w.pathCB(&r.Dirent); err != nil { + return err + } + } + + if isDir { + r.ready.Lock() } } @@ -150,7 +142,7 @@ func (w *Walker) sendDirentsToPathCallback(direntCh <-chan Dirent) error { type heap []*pathRequest func pathCompare(a, b *pathRequest) int { - return strings.Compare(b.path.string(), a.path.string()) + return b.Path.compare(&a.Path) } func (h *heap) Insert(req *pathRequest) { @@ -195,113 +187,218 @@ func sortPathRequests(ctx context.Context, requestCh <-chan *pathRequest, //noli } } -func (w *Walker) handleDirReads(ctx context.Context, requests chan *pathRequest, errCB ErrorCallback) { +func (w *Walker) handleDirReads(ctx context.Context, sortedRequests, requestCh chan *pathRequest, + errCB ErrorCallback, ignoreSymlinks bool) { buffer := make([]byte, os.Getpagesize()) + var pathBuffer [maxPathLength + 1]byte + Loop: for { select { case <-ctx.Done(): break Loop - case request := <-requests: - children, err := godirwalk.ReadDirents(request.path.string(), buffer) - if err != nil { - errCB(string(request.path.Bytes()), err) + case request := <-sortedRequests: + l := len(request.Path.appendTo(pathBuffer[:0])) + pathBuffer[l] = 0 + + if err := scan(buffer, &pathBuffer[0], request, ignoreSymlinks); err != nil { + errCB(string(pathBuffer[:l]), err) } - request.response <- w.childrenToDirents(children, request.path) + go scanChildDirs(requestCh, request) } } } -func (w *Walker) childrenToDirents(children godirwalk.Dirents, parent *FilePath) []Dirent { - dirents := make([]Dirent, 0, len(children)) +func scanChildDirs(requestCh chan *pathRequest, request *pathRequest) { + for p, r := &request.Path, request.next; r != nil && r.Path.parent == p; { + next := r.next - for _, child := range children { - dirent := Dirent{ - Path: parent.sub(child), - Type: child.ModeType(), - Inode: child.Inode(), + if r.IsDir() { + requestCh <- r } - if w.ignoreSymlinks && dirent.IsSymlink() { - continue + r = next + } +} + +type scanner struct { + buffer, read []byte + fh int + syscall.Dirent + err error +} + +func (s *scanner) Next() bool { + for len(s.read) == 0 { + n, err := syscall.ReadDirent(s.fh, s.buffer) + if err != nil { + if errors.Is(err, syscall.EINTR) { + continue + } + + s.err = err + + return false + } + + if n <= 0 { + return false } - dirents = append(dirents, dirent) + s.read = s.buffer[:n] } - sort.Slice(dirents, func(i, j int) bool { - return dirents[i].Path.string() < dirents[j].Path.string() - }) + copy((*[unsafe.Sizeof(syscall.Dirent{})]byte)(unsafe.Pointer(&s.Dirent))[:], s.read) + s.read = s.read[s.Reclen:] - return dirents + return true } -type flowController struct { - controller chan chan<- Dirent -} +func (s *scanner) Get() (string, fs.FileMode, uint64) { + mode := s.getMode() -func newController() *flowController { - return controllerPool.Get().(*flowController) //nolint:forcetypeassert + return s.getName(mode.IsDir()), mode, s.Ino } -func (f *flowController) GetControl() chan<- Dirent { - return <-f.controller -} +func (s *scanner) getName(isDir bool) string { //nolint:gocyclo + n := s.Dirent.Name[:] + name := *(*[]byte)(unsafe.Pointer(&n)) + + l := bytes.IndexByte(name, 0) + if l < 0 || l == 1 && s.Dirent.Name[0] == '.' || l == 2 && s.Dirent.Name[0] == '.' && s.Dirent.Name[1] == '.' { + return "" + } -func (f *flowController) PassControl(control chan<- Dirent) { - f.controller <- control - <-f.controller + if isDir { + s.Dirent.Name[l] = '/' + l++ + } + + return string(name[:l]) } -func (f *flowController) EndControl() { - f.controller <- nil - controllerPool.Put(f) +func (s *scanner) getMode() fs.FileMode { + switch s.Type { + case syscall.DT_DIR: + return fs.ModeDir + case syscall.DT_LNK: + return fs.ModeSymlink + case syscall.DT_CHR: + return fs.ModeDevice | fs.ModeCharDevice + case syscall.DT_BLK: + return fs.ModeDevice + case syscall.DT_FIFO: + return fs.ModeNamedPipe + case syscall.DT_SOCK: + return fs.ModeSocket + } + + return 0 } -var controllerPool = sync.Pool{ //nolint:gochecknoglobals - New: func() any { - return &flowController{ - controller: make(chan chan<- Dirent), +func scan(buffer []byte, path *byte, request *pathRequest, ignoreSymlinks bool) error { + defer request.ready.Unlock() + + fh, err := open(path) + if err != nil { + return err + } + + defer syscall.Close(fh) + + s := scanner{ + buffer: buffer, + fh: fh, + } + + var last *pathRequest + + for s.Next() { + name, mode, inode := s.Get() + if inode == 0 || name == "" || ignoreSymlinks && mode&fs.ModeSymlink != 0 { + continue } - }, + + last = addDirent(request, last, name, mode, inode) + } + + return nil } -func walkDirectory(ctx context.Context, dirent Dirent, - flowControl *flowController, request func(*FilePath) []Dirent, sendDirs bool) { - children := request(dirent.Path) - childControllers := make([]*flowController, len(children)) +func open(path *byte) (int, error) { + const atFDCWD = -0x64 - for n, child := range children { - if child.IsDir() { - childControllers[n] = newController() + dfd := atFDCWD - go walkDirectory(ctx, child, childControllers[n], request, sendDirs) - } + ifh, _, err := syscall.Syscall6( + syscall.SYS_OPENAT, + uintptr(dfd), + uintptr(unsafe.Pointer(path)), + uintptr(syscall.O_RDONLY), + uintptr(0), 0, 0) + if err != 0 { + return 0, err } - control := flowControl.GetControl() + return int(ifh), nil +} - if sendDirs { - sendEntry(ctx, dirent, control) +func addDirent(request, last *pathRequest, name string, + mode fs.FileMode, inode uint64) *pathRequest { + d := &pathRequest{ + Dirent: Dirent{ + Path: FilePath{ + parent: &request.Path, + name: name, + depth: request.Path.depth + 1, + }, + Type: mode, + Inode: inode, + }, } - for n, childController := range childControllers { - if childController == nil { - sendEntry(ctx, children[n], control) - } else { - childController.PassControl(control) - } + if mode.IsDir() { + d.ready.Lock() + } + + return insertDirent(request, last, d) +} + +func insertDirent(request, last, d *pathRequest) *pathRequest { + if last == nil { + return addFirst(request, d) + } else if last.Path.name < d.Path.name { + return insertAtEnd(last, d) } - flowControl.EndControl() + insertIntoList(request, last, d) + + return last +} + +func addFirst(request, d *pathRequest) *pathRequest { + d.next = request.next + request.next = d + + return d +} + +func insertAtEnd(last, d *pathRequest) *pathRequest { + d.next = last.next + last.next = d + + return d } -func sendEntry(ctx context.Context, dirent Dirent, direntCh chan<- Dirent) { - select { - case <-ctx.Done(): - return - case direntCh <- dirent: +func insertIntoList(request, last, d *pathRequest) { + for curr := &request.next; curr != &last.next; curr = &(*curr).next { + if d.Path.name < (*curr).Path.name { + d.next = *curr + *curr = d + + return + } } } diff --git a/walk/walk_test.go b/walk/walk_test.go index e1645d45..c9e55809 100644 --- a/walk/walk_test.go +++ b/walk/walk_test.go @@ -60,7 +60,7 @@ func TestWalk(t *testing.T) { walkErrors = append(walkErrors, err) } - Convey("You can output the paths to a file", func() { + FocusConvey("You can output the paths to a file", func() { ok := testOutputToFiles(true, false, walkDir, outDir, cb, expectedPaths) So(ok, ShouldBeTrue) So(len(walkErrors), ShouldEqual, 0) From 4f546006dbf8fa128a2b7584a07b94326b5bc8f2 Mon Sep 17 00:00:00 2001 From: Michael Woolnough Date: Tue, 17 Dec 2024 15:48:31 +0000 Subject: [PATCH 2/8] Pass context to scanChildDirs --- walk/walk.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/walk/walk.go b/walk/walk.go index 0ae54490..e0a53baa 100644 --- a/walk/walk.go +++ b/walk/walk.go @@ -206,17 +206,21 @@ Loop: errCB(string(pathBuffer[:l]), err) } - go scanChildDirs(requestCh, request) + go scanChildDirs(ctx, requestCh, request) } } } -func scanChildDirs(requestCh chan *pathRequest, request *pathRequest) { +func scanChildDirs(ctx context.Context, requestCh chan *pathRequest, request *pathRequest) { for p, r := &request.Path, request.next; r != nil && r.Path.parent == p; { next := r.next if r.IsDir() { - requestCh <- r + select { + case <-ctx.Done(): + return + case requestCh <- r: + } } r = next From 2a5acb0cd10924c81a24a383f219e126659d4669 Mon Sep 17 00:00:00 2001 From: Michael Woolnough Date: Wed, 18 Dec 2024 13:40:13 +0000 Subject: [PATCH 3/8] Use sync.Pools to reuse memory --- walk/dirent.go | 172 +++++++++++++++++++++++++++++++++------------- walk/file.go | 2 +- walk/walk.go | 158 ++++++++++++++++-------------------------- walk/walk_test.go | 13 +++- 4 files changed, 193 insertions(+), 152 deletions(-) diff --git a/walk/dirent.go b/walk/dirent.go index 8ef168bd..9ce16bb4 100644 --- a/walk/dirent.go +++ b/walk/dirent.go @@ -27,93 +27,167 @@ package walk import ( + "bytes" "io/fs" "os" - "strings" + "sync" ) -// FilePath is a byte-slice of a path. -type FilePath struct { - parent *FilePath - name string +func newDirentPool(size int) *sync.Pool { + return &sync.Pool{ + New: func() any { + return &Dirent{ + name: make([]byte, 0, size), + } + }, + } +} + +var ( + direntPool0 = newDirentPool(0) //nolint:gochecknoglobals + direntPool32 = newDirentPool(32) //nolint:gochecknoglobals,mnd + direntPool64 = newDirentPool(64) //nolint:gochecknoglobals,mnd + direntPool128 = newDirentPool(128) //nolint:gochecknoglobals,mnd + dirEntPool256 = newDirentPool(257) //nolint:gochecknoglobals,mnd +) + +func getDirent(size int) *Dirent { + switch { + case size == 0: + return direntPool0.Get().(*Dirent) //nolint:forcetypeassert + case size <= 32: //nolint:mnd + return direntPool32.Get().(*Dirent) //nolint:forcetypeassert + case size <= 64: //nolint:mnd + return direntPool64.Get().(*Dirent) //nolint:forcetypeassert + case size <= 128: //nolint:mnd + return direntPool128.Get().(*Dirent) //nolint:forcetypeassert + } + + return dirEntPool256.Get().(*Dirent) //nolint:forcetypeassert +} + +func putDirent(d *Dirent) { + d.name = d.name[:0] + d.parent = nil + + switch cap(d.name) { + case 0: + direntPool0.Put(d) + case 32: //nolint:mnd + direntPool32.Put(d) + case 64: //nolint:mnd + direntPool64.Put(d) + case 128: //nolint:mnd + direntPool128.Put(d) + default: + dirEntPool256.Put(d) + } +} + +// Dirent represents a file system directory entry (a file or a directory), +// providing information about the entry's path, type and inode. +type Dirent struct { + parent *Dirent + name []byte depth uint16 + + // Type is the type bits of the file mode of this entry. + Type fs.FileMode + + // Inode is the file system inode number for this entry. + Inode uint64 + + next *Dirent + ready sync.Mutex +} + +// IsDir returns true if we are a directory. +func (d *Dirent) IsDir() bool { + return d.Type.IsDir() +} + +// IsRegular returns true if we are a regular file. +func (d *Dirent) IsRegular() bool { + return d.Type.IsRegular() } -// NewFilePath creates a new FilePath, setting the value to the given string. -func NewFilePath(path string) FilePath { - return FilePath{name: path} +// IsSymlink returns true if we are a symlink. +func (d *Dirent) IsSymlink() bool { + return d.Type&os.ModeSymlink != 0 } -func (f *FilePath) appendTo(p []byte) []byte { - if f.parent != nil { - p = f.parent.appendTo(p) +func (d *Dirent) appendTo(p []byte) []byte { + if d.parent != nil { + p = d.parent.appendTo(p) } - return append(p, f.name...) + return append(p, d.name...) } // Bytes returns the FilePath as a literal byte-slice. -func (f *FilePath) Bytes() []byte { - return f.appendTo(nil) +func (d *Dirent) Bytes() []byte { + return d.appendTo(nil) } -func (f *FilePath) compare(g *FilePath) int { - if f.depth < g.depth { - return f.compareTo(g.getDepth(f.depth)) - } else if f.depth > g.depth { - return f.getDepth(g.depth).compareTo(g) +func (d *Dirent) compare(e *Dirent) int { + if d.depth < e.depth { + e = e.getDepth(d.depth) + } else if d.depth > e.depth { + d = d.getDepth(e.depth) } - return f.compareTo(g) + return e.compareTo(d) } -func (f *FilePath) getDepth(n uint16) *FilePath { - for f.depth != n { - f = f.parent +func (d *Dirent) getDepth(n uint16) *Dirent { + for d.depth != n { + d = d.parent } - return f + return d } -func (f *FilePath) compareTo(g *FilePath) int { - if f == g { +func (d *Dirent) compareTo(e *Dirent) int { + if d == e { return 0 } - cmp := f.parent.compareTo(g.parent) + cmp := d.parent.compareTo(e.parent) if cmp == 0 { - return strings.Compare(f.name, g.name) + return bytes.Compare(d.name, e.name) } return cmp } -// Dirent represents a file system directory entry (a file or a directory), -// providing information about the entry's path, type and inode. -type Dirent struct { - // Path is the complete path to the directory entry (including both - // directory and basename) - Path FilePath +func (d *Dirent) sub(name []byte, mode fs.FileMode, inode uint64) *Dirent { + de := getDirent(len(name)) - // Type is the type bits of the file mode of this entry. - Type fs.FileMode + de.parent = d + de.name = append(de.name, name...) + de.depth = d.depth + 1 + de.Type = mode + de.Inode = inode - // Inode is the file system inode number for this entry. - Inode uint64 -} + if mode.IsDir() { + de.ready.Lock() + } -// IsDir returns true if we are a directory. -func (d *Dirent) IsDir() bool { - return d.Type.IsDir() + return de } -// IsRegular returns true if we are a regular file. -func (d *Dirent) IsRegular() bool { - return d.Type.IsRegular() -} +func (d *Dirent) done() *Dirent { + next := d.next + d.next = nil -// IsSymlink returns true if we are a symlink. -func (d *Dirent) IsSymlink() bool { - return d.Type&os.ModeSymlink != 0 + if len(d.name) == 0 { + putDirent(d.parent) + } + + if !d.IsDir() { + putDirent(d) + } + + return next } diff --git a/walk/file.go b/walk/file.go index 3e1f4ad5..3e734d8c 100644 --- a/walk/file.go +++ b/walk/file.go @@ -173,7 +173,7 @@ func (f *Files) WritePaths() PathCallback { return func(entry *Dirent) error { return f.writePath(append( strconv.AppendQuote( - quoted[:0], unsafe.String(&tmpPath[0], len(entry.Path.appendTo(tmpPath[:0]))), + quoted[:0], unsafe.String(&tmpPath[0], len(entry.appendTo(tmpPath[:0]))), ), '\n')) } } diff --git a/walk/walk.go b/walk/walk.go index e0a53baa..c1429d88 100644 --- a/walk/walk.go +++ b/walk/walk.go @@ -37,7 +37,6 @@ import ( "os" "path/filepath" "slices" - "sync" "syscall" "unsafe" ) @@ -78,12 +77,6 @@ func New(cb PathCallback, includDirs, ignoreSymlinks bool) *Walker { // will be provided problematic paths encountered during the walk. type ErrorCallback func(path string, err error) -type pathRequest struct { - Dirent - next *pathRequest - ready sync.Mutex -} - // Walk will discover all the paths nested under the given dir, and send them to // our PathCallback. // @@ -94,22 +87,20 @@ type pathRequest struct { // method won't return an error. func (w *Walker) Walk(dir string, errCB ErrorCallback) error { dir = filepath.Clean(dir) + "/" - requestCh := make(chan *pathRequest) - sortedRequestCh := make(chan *pathRequest) + requestCh := make(chan *Dirent) + sortedRequestCh := make(chan *Dirent) ctx, stop := context.WithCancel(context.Background()) for range walkers { go w.handleDirReads(ctx, sortedRequestCh, requestCh, errCB, w.ignoreSymlinks) } - go sortPathRequests(ctx, requestCh, sortedRequestCh) + go sortDirents(ctx, requestCh, sortedRequestCh) - r := &pathRequest{ - Dirent: Dirent{ - Path: NewFilePath(dir), - Type: fs.ModeDir, - Inode: 0, - }, + r := &Dirent{ + name: []byte(dir), + Type: fs.ModeDir, + Inode: 0, //TODO } r.ready.Lock() @@ -121,36 +112,43 @@ func (w *Walker) Walk(dir string, errCB ErrorCallback) error { return w.sendDirentsToPathCallback(r) } -func (w *Walker) sendDirentsToPathCallback(r *pathRequest) error { - for ; r != nil; r = r.next { - isDir := r.IsDir() - - if w.sendDirs || !isDir { - if err := w.pathCB(&r.Dirent); err != nil { +func (w *Walker) sendDirentsToPathCallback(r *Dirent) error { + for ; r != nil; r = r.done() { + if len(r.name) > 0 { + if err := w.sendDirentToPathCallback(r); err != nil { return err } } - - if isDir { - r.ready.Lock() - } } return nil } -type heap []*pathRequest +func (w *Walker) sendDirentToPathCallback(r *Dirent) error { + isDir := r.IsDir() + + if w.sendDirs || !isDir { + if err := w.pathCB(r); err != nil { + return err + } + } + + if isDir { + r.ready.Lock() + r.ready.Unlock() //nolint:staticcheck + } -func pathCompare(a, b *pathRequest) int { - return b.Path.compare(&a.Path) + return nil } -func (h *heap) Insert(req *pathRequest) { - pos, _ := slices.BinarySearchFunc(*h, req, pathCompare) +type heap []*Dirent + +func (h *heap) Insert(req *Dirent) { + pos, _ := slices.BinarySearchFunc(*h, req, (*Dirent).compare) *h = slices.Insert(*h, pos, req) } -func (h heap) Top() *pathRequest { +func (h heap) Top() *Dirent { return h[len(h)-1] } @@ -158,12 +156,12 @@ func (h *heap) Pop() { *h = (*h)[:len(*h)-1] } -func (h *heap) Push(req *pathRequest) { +func (h *heap) Push(req *Dirent) { *h = append(*h, req) } -func sortPathRequests(ctx context.Context, requestCh <-chan *pathRequest, //nolint:gocyclo - sortedRequestCh chan<- *pathRequest) { +func sortDirents(ctx context.Context, requestCh <-chan *Dirent, //nolint:gocyclo + sortedRequestCh chan<- *Dirent) { var h heap for { @@ -187,7 +185,7 @@ func sortPathRequests(ctx context.Context, requestCh <-chan *pathRequest, //noli } } -func (w *Walker) handleDirReads(ctx context.Context, sortedRequests, requestCh chan *pathRequest, +func (w *Walker) handleDirReads(ctx context.Context, sortedRequests, requestCh chan *Dirent, errCB ErrorCallback, ignoreSymlinks bool) { buffer := make([]byte, os.Getpagesize()) @@ -199,7 +197,7 @@ Loop: case <-ctx.Done(): break Loop case request := <-sortedRequests: - l := len(request.Path.appendTo(pathBuffer[:0])) + l := len(request.appendTo(pathBuffer[:0])) pathBuffer[l] = 0 if err := scan(buffer, &pathBuffer[0], request, ignoreSymlinks); err != nil { @@ -211,8 +209,10 @@ Loop: } } -func scanChildDirs(ctx context.Context, requestCh chan *pathRequest, request *pathRequest) { - for p, r := &request.Path, request.next; r != nil && r.Path.parent == p; { +func scanChildDirs(ctx context.Context, requestCh chan *Dirent, request *Dirent) { + defer request.ready.Unlock() + + for r := request.next; len(r.name) != 0; { next := r.next if r.IsDir() { @@ -260,19 +260,24 @@ func (s *scanner) Next() bool { return true } -func (s *scanner) Get() (string, fs.FileMode, uint64) { +func (s *scanner) Get() ([]byte, fs.FileMode, uint64) { mode := s.getMode() return s.getName(mode.IsDir()), mode, s.Ino } -func (s *scanner) getName(isDir bool) string { //nolint:gocyclo +var ( + dot = []byte{'.', '\x00'} //nolint:gochecknoglobals + dotdot = []byte{'.', '.', '\x00'} //nolint:gochecknoglobals +) + +func (s *scanner) getName(isDir bool) []byte { n := s.Dirent.Name[:] name := *(*[]byte)(unsafe.Pointer(&n)) l := bytes.IndexByte(name, 0) - if l < 0 || l == 1 && s.Dirent.Name[0] == '.' || l == 2 && s.Dirent.Name[0] == '.' && s.Dirent.Name[1] == '.' { - return "" + if l <= 0 || bytes.Equal(name[:2], dot) || bytes.Equal(name[:3], dotdot) { + return nil } if isDir { @@ -280,7 +285,7 @@ func (s *scanner) getName(isDir bool) string { //nolint:gocyclo l++ } - return string(name[:l]) + return name[:l] } func (s *scanner) getMode() fs.FileMode { @@ -302,8 +307,10 @@ func (s *scanner) getMode() fs.FileMode { return 0 } -func scan(buffer []byte, path *byte, request *pathRequest, ignoreSymlinks bool) error { - defer request.ready.Unlock() +func scan(buffer []byte, path *byte, request *Dirent, ignoreSymlinks bool) error { + marker := request.sub(nil, 0, 0) + marker.next = request.next + request.next = marker fh, err := open(path) if err != nil { @@ -317,15 +324,13 @@ func scan(buffer []byte, path *byte, request *pathRequest, ignoreSymlinks bool) fh: fh, } - var last *pathRequest - for s.Next() { name, mode, inode := s.Get() - if inode == 0 || name == "" || ignoreSymlinks && mode&fs.ModeSymlink != 0 { + if inode == 0 || len(name) == 0 || ignoreSymlinks && mode&fs.ModeSymlink != 0 { continue } - last = addDirent(request, last, name, mode, inode) + insertDirent(request, request.sub(name, mode, inode)) } return nil @@ -349,56 +354,9 @@ func open(path *byte) (int, error) { return int(ifh), nil } -func addDirent(request, last *pathRequest, name string, - mode fs.FileMode, inode uint64) *pathRequest { - d := &pathRequest{ - Dirent: Dirent{ - Path: FilePath{ - parent: &request.Path, - name: name, - depth: request.Path.depth + 1, - }, - Type: mode, - Inode: inode, - }, - } - - if mode.IsDir() { - d.ready.Lock() - } - - return insertDirent(request, last, d) -} - -func insertDirent(request, last, d *pathRequest) *pathRequest { - if last == nil { - return addFirst(request, d) - } else if last.Path.name < d.Path.name { - return insertAtEnd(last, d) - } - - insertIntoList(request, last, d) - - return last -} - -func addFirst(request, d *pathRequest) *pathRequest { - d.next = request.next - request.next = d - - return d -} - -func insertAtEnd(last, d *pathRequest) *pathRequest { - d.next = last.next - last.next = d - - return d -} - -func insertIntoList(request, last, d *pathRequest) { - for curr := &request.next; curr != &last.next; curr = &(*curr).next { - if d.Path.name < (*curr).Path.name { +func insertDirent(request, d *Dirent) { + for curr := &request.next; ; curr = &(*curr).next { + if name := (*curr).name; len(name) == 0 || bytes.Compare(d.name, name) == -1 { d.next = *curr *curr = d diff --git a/walk/walk_test.go b/walk/walk_test.go index c9e55809..768dcc70 100644 --- a/walk/walk_test.go +++ b/walk/walk_test.go @@ -152,11 +152,20 @@ func TestWalk(t *testing.T) { w := New(files.WritePaths(), true, false) err = w.Walk("/root", cb) So(err, ShouldBeNil) - So(len(walkErrors), ShouldEqual, 1) + + mu.Lock() + l := len(walkErrors) + mu.Unlock() + + So(l, ShouldEqual, 1) var writeError *WriteError - So(errors.As(walkErrors[0], &writeError), ShouldBeFalse) + mu.Lock() + err = walkErrors[0] + mu.Unlock() + + So(errors.As(err, &writeError), ShouldBeFalse) outPath := filepath.Join(outDir, "walk.1") _, err = os.ReadFile(outPath) From 2c12188789b9279657aaed1add61824a4b2ed872 Mon Sep 17 00:00:00 2001 From: Michael Woolnough Date: Thu, 19 Dec 2024 13:01:05 +0000 Subject: [PATCH 4/8] Speed up children sorting by using an AVL Tree implementation --- walk/dirent.go | 119 +++++++++++++++++++++++++++++++++++++++---------- walk/walk.go | 51 +++++++++++---------- 2 files changed, 123 insertions(+), 47 deletions(-) diff --git a/walk/dirent.go b/walk/dirent.go index 9ce16bb4..a18a2dae 100644 --- a/walk/dirent.go +++ b/walk/dirent.go @@ -37,7 +37,9 @@ func newDirentPool(size int) *sync.Pool { return &sync.Pool{ New: func() any { return &Dirent{ - name: make([]byte, 0, size), + name: make([]byte, 0, size), + parent: nullDirEnt, + next: nullDirEnt, } }, } @@ -49,8 +51,15 @@ var ( direntPool64 = newDirentPool(64) //nolint:gochecknoglobals,mnd direntPool128 = newDirentPool(128) //nolint:gochecknoglobals,mnd dirEntPool256 = newDirentPool(257) //nolint:gochecknoglobals,mnd + + nullDirEnt = new(Dirent) //nolint:gochecknoglobals ) +func init() { //nolint:gochecknoinits + nullDirEnt.parent = nullDirEnt + nullDirEnt.next = nullDirEnt +} + func getDirent(size int) *Dirent { switch { case size == 0: @@ -68,7 +77,9 @@ func getDirent(size int) *Dirent { func putDirent(d *Dirent) { d.name = d.name[:0] - d.parent = nil + d.parent = nullDirEnt + d.next = nullDirEnt + d.depth = 0 switch cap(d.name) { case 0: @@ -87,9 +98,9 @@ func putDirent(d *Dirent) { // Dirent represents a file system directory entry (a file or a directory), // providing information about the entry's path, type and inode. type Dirent struct { - parent *Dirent + parent *Dirent // left name []byte - depth uint16 + depth int16 // Type is the type bits of the file mode of this entry. Type fs.FileMode @@ -97,7 +108,7 @@ type Dirent struct { // Inode is the file system inode number for this entry. Inode uint64 - next *Dirent + next *Dirent // right ready sync.Mutex } @@ -139,7 +150,7 @@ func (d *Dirent) compare(e *Dirent) int { return e.compareTo(d) } -func (d *Dirent) getDepth(n uint16) *Dirent { +func (d *Dirent) getDepth(n int16) *Dirent { for d.depth != n { d = d.parent } @@ -161,25 +172,9 @@ func (d *Dirent) compareTo(e *Dirent) int { return cmp } -func (d *Dirent) sub(name []byte, mode fs.FileMode, inode uint64) *Dirent { - de := getDirent(len(name)) - - de.parent = d - de.name = append(de.name, name...) - de.depth = d.depth + 1 - de.Type = mode - de.Inode = inode - - if mode.IsDir() { - de.ready.Lock() - } - - return de -} - func (d *Dirent) done() *Dirent { next := d.next - d.next = nil + d.next = nullDirEnt if len(d.name) == 0 { putDirent(d.parent) @@ -191,3 +186,81 @@ func (d *Dirent) done() *Dirent { return next } + +func (d *Dirent) insert(e *Dirent) *Dirent { //nolint:gocyclo + if d == nullDirEnt { + return e + } + + switch bytes.Compare(d.name, e.name) { + case 1: + d.parent = d.parent.insert(e) + case -1: + d.next = d.next.insert(e) + } + + d.setDepth() + + switch d.parent.depth - d.next.depth { + case -2: + if d.next.parent.depth > d.next.next.depth { + d.next = d.next.rotateRight() + } + + return d.rotateLeft() + case 2: //nolint:mnd + if d.parent.next.depth > d.parent.parent.depth { + d.parent = d.parent.rotateLeft() + } + + return d.rotateRight() + } + + return d +} + +func (d *Dirent) setDepth() { + if d == nullDirEnt { + return + } + + if d.parent.depth > d.next.depth { + d.depth = d.parent.depth + 1 + } else { + d.depth = d.next.depth + 1 + } +} + +func (d *Dirent) rotateLeft() *Dirent { + n := d.next + d.next = n.parent + n.parent = d + + d.setDepth() + n.setDepth() + + return n +} + +func (d *Dirent) rotateRight() *Dirent { + n := d.parent + d.parent = n.next + n.next = d + + d.setDepth() + n.setDepth() + + return n +} + +func (d *Dirent) flatten(parent, prev *Dirent, depth int16) *Dirent { + if d == nullDirEnt { + return prev + } + + d.parent.flatten(parent, prev, depth).next = d + d.parent = parent + d.depth = depth + + return d.next.flatten(parent, d, depth) +} diff --git a/walk/walk.go b/walk/walk.go index c1429d88..674511fe 100644 --- a/walk/walk.go +++ b/walk/walk.go @@ -101,6 +101,7 @@ func (w *Walker) Walk(dir string, errCB ErrorCallback) error { name: []byte(dir), Type: fs.ModeDir, Inode: 0, //TODO + next: nullDirEnt, } r.ready.Lock() @@ -113,7 +114,7 @@ func (w *Walker) Walk(dir string, errCB ErrorCallback) error { } func (w *Walker) sendDirentsToPathCallback(r *Dirent) error { - for ; r != nil; r = r.done() { + for ; r != nullDirEnt; r = r.done() { if len(r.name) > 0 { if err := w.sendDirentToPathCallback(r); err != nil { return err @@ -200,22 +201,29 @@ Loop: l := len(request.appendTo(pathBuffer[:0])) pathBuffer[l] = 0 - if err := scan(buffer, &pathBuffer[0], request, ignoreSymlinks); err != nil { + root, err := scan(buffer, &pathBuffer[0], ignoreSymlinks) + if err != nil { errCB(string(pathBuffer[:l]), err) } - go scanChildDirs(ctx, requestCh, request) + go scanChildDirs(ctx, requestCh, request, root) } } } -func scanChildDirs(ctx context.Context, requestCh chan *Dirent, request *Dirent) { - defer request.ready.Unlock() +func scanChildDirs(ctx context.Context, requestCh chan *Dirent, request, root *Dirent) { + marker := getDirent(0) + marker.next = request.next + marker.parent = request + + root.flatten(request, request, request.depth+1).next = marker - for r := request.next; len(r.name) != 0; { + for r := request.next; r != marker; { next := r.next if r.IsDir() { + r.ready.Lock() + select { case <-ctx.Done(): return @@ -225,6 +233,8 @@ func scanChildDirs(ctx context.Context, requestCh chan *Dirent, request *Dirent) r = next } + + request.ready.Unlock() } type scanner struct { @@ -307,14 +317,12 @@ func (s *scanner) getMode() fs.FileMode { return 0 } -func scan(buffer []byte, path *byte, request *Dirent, ignoreSymlinks bool) error { - marker := request.sub(nil, 0, 0) - marker.next = request.next - request.next = marker +func scan(buffer []byte, path *byte, ignoreSymlinks bool) (*Dirent, error) { + root := nullDirEnt fh, err := open(path) if err != nil { - return err + return root, err } defer syscall.Close(fh) @@ -330,10 +338,16 @@ func scan(buffer []byte, path *byte, request *Dirent, ignoreSymlinks bool) error continue } - insertDirent(request, request.sub(name, mode, inode)) + de := getDirent(len(name)) + + de.name = append(de.name, name...) + de.Type = mode + de.Inode = inode + + root = root.insert(de) } - return nil + return root, nil } func open(path *byte) (int, error) { @@ -353,14 +367,3 @@ func open(path *byte) (int, error) { return int(ifh), nil } - -func insertDirent(request, d *Dirent) { - for curr := &request.next; ; curr = &(*curr).next { - if name := (*curr).name; len(name) == 0 || bytes.Compare(d.name, name) == -1 { - d.next = *curr - *curr = d - - return - } - } -} From b8fc7641905bf6f23c2ba339b0f73da339dcf05b Mon Sep 17 00:00:00 2001 From: Michael Woolnough Date: Thu, 19 Dec 2024 13:42:16 +0000 Subject: [PATCH 5/8] Get inode of passed path --- walk/walk.go | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/walk/walk.go b/walk/walk.go index 674511fe..1b6ef65a 100644 --- a/walk/walk.go +++ b/walk/walk.go @@ -86,6 +86,11 @@ type ErrorCallback func(path string, err error) // errors will mean the path isn't output, but the walk will continue and this // method won't return an error. func (w *Walker) Walk(dir string, errCB ErrorCallback) error { + inode, err := getInitialInode(dir) + if err != nil { + return err + } + dir = filepath.Clean(dir) + "/" requestCh := make(chan *Dirent) sortedRequestCh := make(chan *Dirent) @@ -100,7 +105,7 @@ func (w *Walker) Walk(dir string, errCB ErrorCallback) error { r := &Dirent{ name: []byte(dir), Type: fs.ModeDir, - Inode: 0, //TODO + Inode: inode, next: nullDirEnt, } @@ -113,6 +118,24 @@ func (w *Walker) Walk(dir string, errCB ErrorCallback) error { return w.sendDirentsToPathCallback(r) } +func getInitialInode(dir string) (uint64, error) { + fi, err := os.Stat(dir) + if err != nil { + return 0, err + } + + if !fi.IsDir() { + return 0, fs.ErrInvalid + } + + st, ok := fi.Sys().(*syscall.Stat_t) + if !ok { + return 0, fs.ErrInvalid + } + + return st.Ino, nil +} + func (w *Walker) sendDirentsToPathCallback(r *Dirent) error { for ; r != nullDirEnt; r = r.done() { if len(r.name) > 0 { From 9d86d92c345eb6a7f16d758f92d8d3c3e3e09952 Mon Sep 17 00:00:00 2001 From: Michael Woolnough Date: Thu, 19 Dec 2024 13:48:33 +0000 Subject: [PATCH 6/8] Delint --- walk/dirent.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/walk/dirent.go b/walk/dirent.go index a18a2dae..b130e878 100644 --- a/walk/dirent.go +++ b/walk/dirent.go @@ -63,16 +63,16 @@ func init() { //nolint:gochecknoinits func getDirent(size int) *Dirent { switch { case size == 0: - return direntPool0.Get().(*Dirent) //nolint:forcetypeassert + return direntPool0.Get().(*Dirent) //nolint:forcetypeassert,errcheck case size <= 32: //nolint:mnd - return direntPool32.Get().(*Dirent) //nolint:forcetypeassert + return direntPool32.Get().(*Dirent) //nolint:forcetypeassert,errcheck case size <= 64: //nolint:mnd - return direntPool64.Get().(*Dirent) //nolint:forcetypeassert + return direntPool64.Get().(*Dirent) //nolint:forcetypeassert,errcheck case size <= 128: //nolint:mnd - return direntPool128.Get().(*Dirent) //nolint:forcetypeassert + return direntPool128.Get().(*Dirent) //nolint:forcetypeassert,errcheck } - return dirEntPool256.Get().(*Dirent) //nolint:forcetypeassert + return dirEntPool256.Get().(*Dirent) //nolint:forcetypeassert,errcheck } func putDirent(d *Dirent) { From 1389294d08c4724e13ead53d8c371b5a51d55f2b Mon Sep 17 00:00:00 2001 From: Michael Woolnough Date: Thu, 19 Dec 2024 13:53:34 +0000 Subject: [PATCH 7/8] Unfocus test --- walk/walk_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/walk/walk_test.go b/walk/walk_test.go index 768dcc70..e6f15db3 100644 --- a/walk/walk_test.go +++ b/walk/walk_test.go @@ -60,7 +60,7 @@ func TestWalk(t *testing.T) { walkErrors = append(walkErrors, err) } - FocusConvey("You can output the paths to a file", func() { + Convey("You can output the paths to a file", func() { ok := testOutputToFiles(true, false, walkDir, outDir, cb, expectedPaths) So(ok, ShouldBeTrue) So(len(walkErrors), ShouldEqual, 0) From 18d8f946ccff3cfb55380591081aed3a927e6ed1 Mon Sep 17 00:00:00 2001 From: Michael Woolnough Date: Thu, 19 Dec 2024 14:02:22 +0000 Subject: [PATCH 8/8] Return the error returned while scanning directory --- walk/walk.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/walk/walk.go b/walk/walk.go index 1b6ef65a..673097d0 100644 --- a/walk/walk.go +++ b/walk/walk.go @@ -370,7 +370,7 @@ func scan(buffer []byte, path *byte, ignoreSymlinks bool) (*Dirent, error) { root = root.insert(de) } - return root, nil + return root, s.err } func open(path *byte) (int, error) {