Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: move parsing back to fastwalk #17

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 6 additions & 50 deletions fastwalk_getdirentries_darwin.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@ import (
"sync"
"syscall"
"unsafe"

"github.com/charlievieth/fastwalk/internal/dirent"
)

// TODO: increase
const direntBufSize = 32 * 1024

var direntBufPool = sync.Pool{
Expand Down Expand Up @@ -43,18 +46,18 @@ func readDir(dirName string, fn func(dirName, entName string, de fs.DirEntry) er
buf := dbuf[:length]

for i := 0; len(buf) > 0; i++ {
reclen, ok := direntReclen(buf)
reclen, ok := dirent.DirentReclen(buf)
if !ok || reclen > uint64(len(buf)) {
break
}
rec := buf[:reclen]
buf = buf[reclen:]
typ := direntType(rec)
typ := dirent.DirentType(rec)
if skipFiles && typ.IsRegular() {
continue
}
const namoff = uint64(unsafe.Offsetof(syscall.Dirent{}.Name))
namlen, ok := direntNamlen(rec)
namlen, ok := dirent.DirentNamlen(rec)
if !ok || namoff+namlen > uint64(len(rec)) {
break
}
Expand All @@ -80,50 +83,3 @@ func readDir(dirName string, fn func(dirName, entName string, de fs.DirEntry) er

return nil
}

// readInt returns the size-bytes unsigned integer in native byte order at offset off.
func readInt(b []byte, off, size uintptr) (uint64, bool) {
if len(b) >= int(off+size) {
p := b[off:]
_ = p[1] // bounds check hint to compiler; see golang.org/issue/14808
return uint64(p[0]) | uint64(p[1])<<8, true
}
return 0, false
}

// Statically assert that the size of Reclen and Namlen is 2.
var _ = ([2]int{})[unsafe.Sizeof(syscall.Dirent{}.Reclen)-1]
var _ = ([2]int{})[unsafe.Sizeof(syscall.Dirent{}.Namlen)-1]

func direntReclen(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Reclen), unsafe.Sizeof(syscall.Dirent{}.Reclen))
}

func direntNamlen(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Namlen), unsafe.Sizeof(syscall.Dirent{}.Namlen))
}

func direntType(buf []byte) os.FileMode {
off := unsafe.Offsetof(syscall.Dirent{}.Type)
if off >= uintptr(len(buf)) {
return ^os.FileMode(0) // unknown
}
typ := buf[off]
switch typ {
case syscall.DT_BLK:
return os.ModeDevice
case syscall.DT_CHR:
return os.ModeDevice | os.ModeCharDevice
case syscall.DT_DIR:
return os.ModeDir
case syscall.DT_FIFO:
return os.ModeNamedPipe
case syscall.DT_LNK:
return os.ModeSymlink
case syscall.DT_REG:
return 0
case syscall.DT_SOCK:
return os.ModeSocket
}
return ^os.FileMode(0)
}
93 changes: 50 additions & 43 deletions fastwalk_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,22 @@ package fastwalk
import (
"io/fs"
"os"
"sync"
"syscall"
"unsafe"

"github.com/charlievieth/fastwalk/internal/dirent"
)

// More than 5760 to work around https://golang.org/issue/24015.
const blockSize = 8192
// Empirical testing shows that 32k is the ideal buffer size.
const direntBufSize = 32 * 1024

// unknownFileMode is a sentinel (and bogus) os.FileMode
// value used to represent a syscall.DT_UNKNOWN Dirent.Type.
const unknownFileMode os.FileMode = os.ModeNamedPipe | os.ModeSocket | os.ModeDevice
var direntBufPool = sync.Pool{
New: func() interface{} {
b := make([]byte, direntBufSize)
return &b
},
}

func readDir(dirName string, fn func(dirName, entName string, de fs.DirEntry) error) error {
fd, err := open(dirName, 0, 0)
Expand All @@ -28,52 +33,54 @@ func readDir(dirName string, fn func(dirName, entName string, de fs.DirEntry) er
}
defer syscall.Close(fd)

// The buffer must be at least a block long.
buf := make([]byte, blockSize) // stack-allocated; doesn't escape
bufp := 0 // starting read position in buf
nbuf := 0 // end valid data in buf
pb := direntBufPool.Get().(*[]byte)
defer direntBufPool.Put(pb)
bbuf := *pb

skipFiles := false
for {
if bufp >= nbuf {
bufp = 0
nbuf, err = readDirent(fd, buf)
if err != nil {
return os.NewSyscallError("readdirent", err)
}
if nbuf <= 0 {
return nil
}
n, err := readDirent(fd, bbuf)
if err != nil {
return err
}
consumed, name, typ := dirent.Parse(buf[bufp:nbuf])
bufp += consumed

if name == "" || name == "." || name == ".." {
continue
if n <= 0 {
return nil
}
// Fallback for filesystems (like old XFS) that don't
// support Dirent.Type and have DT_UNKNOWN (0) there
// instead.
if typ == unknownFileMode {
fi, err := os.Lstat(dirName + "/" + name)
if err != nil {
// It got deleted in the meantime.
if os.IsNotExist(err) {
continue
buf := bbuf[:n:n]

for len(buf) > 0 {
reclen, ok := dirent.DirentReclen(buf)
if !ok || reclen > uint64(len(buf)) {
return nil
}
rec := buf[:reclen]
buf = buf[reclen:]
typ := dirent.DirentType(rec)
if skipFiles && typ.IsRegular() {
continue
}
const namoff = uint64(unsafe.Offsetof(syscall.Dirent{}.Name))
namlen, ok := dirent.DirentNamlen(rec)
if !ok || namoff+namlen > uint64(len(rec)) {
break
}
name := rec[namoff : namoff+namlen]
for i, c := range name {
if c == 0 {
name = name[:i]
break
}
return err
}
typ = fi.Mode() & os.ModeType
}
if skipFiles && typ.IsRegular() {
continue
}
de := newUnixDirent(dirName, name, typ)
if err := fn(dirName, name, de); err != nil {
if err == ErrSkipFiles {
skipFiles = true
if string(name) == "." || string(name) == ".." {
continue
}
return err
nm := string(name)
if err := fn(dirName, nm, newUnixDirent(dirName, nm, typ)); err != nil {
if err != ErrSkipFiles {
return err
}
skipFiles = true
}
}
}
}
Expand Down
59 changes: 1 addition & 58 deletions internal/dirent/dirent.go
Original file line number Diff line number Diff line change
@@ -1,14 +1,7 @@
//go:build aix || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris
//go:build aix || darwin || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris

package dirent

import (
"os"
"runtime"
"syscall"
"unsafe"
)

// readInt returns the size-bytes unsigned integer in native byte order at offset off.
func readInt(b []byte, off, size uintptr) (u uint64, ok bool) {
if len(b) < int(off+size) {
Expand Down Expand Up @@ -57,53 +50,3 @@ func readIntLE(b []byte, size uintptr) uint64 {
panic("syscall: readInt with unsupported size")
}
}

const InvalidMode = os.FileMode(1<<32 - 1)

func Parse(buf []byte) (consumed int, name string, typ os.FileMode) {

reclen, ok := direntReclen(buf)
if !ok || reclen > uint64(len(buf)) {
// WARN: this is a hard error because we consumed 0 bytes
// and not stopping here could lead to an infinite loop.
return 0, "", InvalidMode
}
consumed = int(reclen)
rec := buf[:reclen]

ino, ok := direntIno(rec)
if !ok {
return consumed, "", InvalidMode
}
// When building to wasip1, the host runtime might be running on Windows
// or might expose a remote file system which does not have the concept
// of inodes. Therefore, we cannot make the assumption that it is safe
// to skip entries with zero inodes.
if ino == 0 && runtime.GOOS != "wasip1" {
return consumed, "", InvalidMode
}

typ = direntType(buf)

const namoff = uint64(unsafe.Offsetof(syscall.Dirent{}.Name))
namlen, ok := direntNamlen(rec)
if !ok || namoff+namlen > uint64(len(rec)) {
return consumed, "", InvalidMode
}
namebuf := rec[namoff : namoff+namlen]
for i, c := range namebuf {
if c == 0 {
namebuf = namebuf[:i]
break
}
}
// Check for useless names before allocating a string.
if string(namebuf) == "." {
name = "."
} else if string(namebuf) == ".." {
name = ".."
} else {
name = string(namebuf)
}
return consumed, name, typ
}
10 changes: 5 additions & 5 deletions internal/dirent/dirent_aix.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,22 @@ import (
"unsafe"
)

func direntIno(buf []byte) (uint64, bool) {
func DirentIno(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Ino), unsafe.Sizeof(syscall.Dirent{}.Ino))
}

func direntReclen(buf []byte) (uint64, bool) {
func DirentReclen(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Reclen), unsafe.Sizeof(syscall.Dirent{}.Reclen))
}

func direntNamlen(buf []byte) (uint64, bool) {
reclen, ok := direntReclen(buf)
func DirentNamlen(buf []byte) (uint64, bool) {
reclen, ok := DirentReclen(buf)
if !ok {
return 0, false
}
return reclen - uint64(unsafe.Offsetof(syscall.Dirent{}.Name)), true
}

func direntType(buf []byte) os.FileMode {
func DirentType(buf []byte) os.FileMode {
return ^os.FileMode(0) // unknown
}
46 changes: 46 additions & 0 deletions internal/dirent/dirent_darwin.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
//go:build darwin

package dirent

import (
"os"
"syscall"
"unsafe"
)

func DirentIno(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Ino), unsafe.Sizeof(syscall.Dirent{}.Ino))
}

func DirentReclen(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Reclen), unsafe.Sizeof(syscall.Dirent{}.Reclen))
}

func DirentNamlen(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Namlen), unsafe.Sizeof(syscall.Dirent{}.Namlen))
}

func DirentType(buf []byte) os.FileMode {
off := unsafe.Offsetof(syscall.Dirent{}.Type)
if off >= uintptr(len(buf)) {
return ^os.FileMode(0) // unknown
}
typ := buf[off]
switch typ {
case syscall.DT_BLK:
return os.ModeDevice
case syscall.DT_CHR:
return os.ModeDevice | os.ModeCharDevice
case syscall.DT_DIR:
return os.ModeDir
case syscall.DT_FIFO:
return os.ModeNamedPipe
case syscall.DT_LNK:
return os.ModeSymlink
case syscall.DT_REG:
return 0
case syscall.DT_SOCK:
return os.ModeSocket
}
return ^os.FileMode(0)
}
10 changes: 5 additions & 5 deletions internal/dirent/dirent_dragonfly.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,23 @@ import (
"unsafe"
)

func direntIno(buf []byte) (uint64, bool) {
func DirentIno(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Fileno), unsafe.Sizeof(syscall.Dirent{}.Fileno))
}

func direntReclen(buf []byte) (uint64, bool) {
namlen, ok := direntNamlen(buf)
func DirentReclen(buf []byte) (uint64, bool) {
namlen, ok := DirentNamlen(buf)
if !ok {
return 0, false
}
return (16 + namlen + 1 + 7) &^ 7, true
}

func direntNamlen(buf []byte) (uint64, bool) {
func DirentNamlen(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Namlen), unsafe.Sizeof(syscall.Dirent{}.Namlen))
}

func direntType(buf []byte) os.FileMode {
func DirentType(buf []byte) os.FileMode {
off := unsafe.Offsetof(syscall.Dirent{}.Type)
if off >= uintptr(len(buf)) {
return ^os.FileMode(0) // unknown
Expand Down
Loading
Loading