From ba624ebef29efc1fbfc8f85201e0b330833fa021 Mon Sep 17 00:00:00 2001 From: Piotr Rojek Date: Thu, 25 Feb 2016 21:58:10 -0800 Subject: [PATCH 1/7] Write tests for SkipDir --- walker_test.go | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/walker_test.go b/walker_test.go index 8a6ebea..c4e78b1 100644 --- a/walker_test.go +++ b/walker_test.go @@ -278,5 +278,44 @@ func TestPowerWalkError(t *testing.T) { assert.True(t, v, k) } } +} + +func TestSkipDir(t *testing.T) { + + // max concurrency out + runtime.GOMAXPROCS(runtime.NumCPU()) + + dirToSkip := fmt.Sprintf("%s/dirToSkip", testFiles) + + // this time, let's make test dirs + one additional directory to skip + makeTestFiles(5, 10) + if err := os.MkdirAll(dirToSkip, 0777); err != nil { + panic(fmt.Sprintf("%s", err)) + } + if err := ioutil.WriteFile(fmt.Sprintf("%s/browserHistory.txt", dirToSkip), []byte("redtube.com"), 0777); err != nil { + panic(fmt.Sprintf("%s", err)) + } + defer deleteTestFiles() + + // declare directories to skip + //SkipDir(fmt.Sprintf("%s/dirToSkip", testFiles)) + + var seenLock sync.Mutex + seen := make(map[string]bool) + walkFunc := func(p string, info os.FileInfo, err error) error { + if !info.IsDir() { + filename := path.Base(p) + seenLock.Lock() + defer seenLock.Unlock() + seen[filename] = true + } + return nil + } + + assert.NoError(t, Walk(testFiles, walkFunc)) + + // check if file inside "dirToSkip" was ommited + assert.False(t, seen["browserHistory.txt"]) + //log.Println(seen) } From bdf0fd6f6f05b17e7be04f9d8b63d51a4e8c5f83 Mon Sep 17 00:00:00 2001 From: Piotr Rojek Date: Thu, 25 Feb 2016 21:59:16 -0800 Subject: [PATCH 2/7] Update README GOMAXPROCS description --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 77dfa2d..8461610 100644 --- a/README.md +++ b/README.md @@ -2,12 +2,14 @@ Go package for walking files and concurrently calling user code to handle each file. This package walks the file system in the same way `filepath.Walk` does, except instead of calling the `walkFn` inline, it uses goroutines to allow the files to be handled concurrently. -Powerwalk functions by walking concurrently over many files. In order to realize any benefits from this approach, you must tell the runtime to use multiple CPUs. For example: +Powerwalk functions by walking concurrently over many files. When using Go in version lower than 1.5, in order to realize any benefits from this approach, you must tell the runtime to use multiple CPUs. For example: ``` runtime.GOMAXPROCS(runtime.NumCPU()) ``` +In Go 1.5 and above it isn't needed, as it's set by default. + ## Usage Powerwalk is a drop-in replacement for the `filepath.Walk` method ([read about that for more details](http://golang.org/pkg/path/filepath/#Walk)), and so has the same signature, even using the `filepath.WalkFunc` too. @@ -25,4 +27,3 @@ powerwalk.WalkLimit(root string, walkFn filepath.WalkFunc, limit int) error The `WalkLimit` function does the same as `Walk`, except allows you to specify the number of files to concurrently walk using the `limit` argument. The `limit` argument must be one or higher (i.e. `>0`). Specificying a limit that's too high, causes unnecessary overhead so sensible numbers are encouraged but not enforced. See the [godoc documentation](http://godoc.org/github.com/stretchr/powerwalk) for more information. - From 02ca0a1e6f00af7bed5196c880e97994871f217b Mon Sep 17 00:00:00 2001 From: Piotr Rojek Date: Thu, 25 Feb 2016 22:09:23 -0800 Subject: [PATCH 3/7] Add section for SkipDir func in README --- README.md | 8 ++++++++ walker_test.go | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8461610..7f0e5c0 100644 --- a/README.md +++ b/README.md @@ -26,4 +26,12 @@ powerwalk.WalkLimit(root string, walkFn filepath.WalkFunc, limit int) error The `WalkLimit` function does the same as `Walk`, except allows you to specify the number of files to concurrently walk using the `limit` argument. The `limit` argument must be one or higher (i.e. `>0`). Specificying a limit that's too high, causes unnecessary overhead so sensible numbers are encouraged but not enforced. +### Omitting directories + +In order to skip some nested directories, please call `SkipDir` function with paths to directories to be omitted: + +``` +powerwalk.SkipDir(dir...string) +``` + See the [godoc documentation](http://godoc.org/github.com/stretchr/powerwalk) for more information. diff --git a/walker_test.go b/walker_test.go index c4e78b1..cfb484b 100644 --- a/walker_test.go +++ b/walker_test.go @@ -314,7 +314,7 @@ func TestSkipDir(t *testing.T) { assert.NoError(t, Walk(testFiles, walkFunc)) - // check if file inside "dirToSkip" was ommited + // check if file inside "dirToSkip" was omitted assert.False(t, seen["browserHistory.txt"]) //log.Println(seen) From d39db92793102f1e2971c3ca662f001fbc06b581 Mon Sep 17 00:00:00 2001 From: Piotr Rojek Date: Thu, 25 Feb 2016 22:33:56 -0800 Subject: [PATCH 4/7] Implement SkipDir func --- walker.go | 12 ++++++++++++ walker_test.go | 6 ++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/walker.go b/walker.go index cc6fd14..9952369 100644 --- a/walker.go +++ b/walker.go @@ -4,6 +4,7 @@ import ( "errors" "os" "path/filepath" + "strings" "sync" ) @@ -12,6 +13,12 @@ import ( // To use a value other than this one, use the WalkLimit function. const DefaultConcurrentWalks int = 100 +var dirsToSkip []string + +func SkipDir(dirs ...string) { + dirsToSkip = dirs +} + // Walk walks the file tree rooted at root, calling walkFn for each file or // directory in the tree, including root. All errors that arise visiting files // and directories are filtered by walkFn. The output is non-deterministic. @@ -92,6 +99,11 @@ func WalkLimit(root string, walkFn filepath.WalkFunc, limit int) error { close(files) return errors.New("kill received while walking") default: + for _, d := range dirsToSkip { + if strings.Contains(p, d) { + return nil + } + } filesWg.Add(1) select { case files <- &walkArgs{path: p, info: info, err: err}: diff --git a/walker_test.go b/walker_test.go index cfb484b..c7ccdd0 100644 --- a/walker_test.go +++ b/walker_test.go @@ -285,7 +285,7 @@ func TestSkipDir(t *testing.T) { // max concurrency out runtime.GOMAXPROCS(runtime.NumCPU()) - dirToSkip := fmt.Sprintf("%s/dirToSkip", testFiles) + dirToSkip := "test_files/dirToSkip" // this time, let's make test dirs + one additional directory to skip makeTestFiles(5, 10) @@ -298,7 +298,7 @@ func TestSkipDir(t *testing.T) { defer deleteTestFiles() // declare directories to skip - //SkipDir(fmt.Sprintf("%s/dirToSkip", testFiles)) + SkipDir(dirToSkip) var seenLock sync.Mutex seen := make(map[string]bool) @@ -316,6 +316,4 @@ func TestSkipDir(t *testing.T) { // check if file inside "dirToSkip" was omitted assert.False(t, seen["browserHistory.txt"]) - - //log.Println(seen) } From 6794b7ae0150fd295c66f00aa1fc79a38ba44037 Mon Sep 17 00:00:00 2001 From: Piotr Rojek Date: Thu, 25 Feb 2016 22:35:26 -0800 Subject: [PATCH 5/7] Add comment to exported func --- walker.go | 1 + 1 file changed, 1 insertion(+) diff --git a/walker.go b/walker.go index 9952369..1d5df62 100644 --- a/walker.go +++ b/walker.go @@ -15,6 +15,7 @@ const DefaultConcurrentWalks int = 100 var dirsToSkip []string +// SkipDir takes variable number of arguments, which are paths not to be walked func SkipDir(dirs ...string) { dirsToSkip = dirs } From 07644b62b28f2721cc628c8eb91c2a008cea571e Mon Sep 17 00:00:00 2001 From: Piotr Rojek Date: Fri, 26 Feb 2016 00:38:43 -0800 Subject: [PATCH 6/7] Changed `Contains` to `HasPrefix` With that we should avoid edge cases failures. --- walker.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/walker.go b/walker.go index 1d5df62..477f41d 100644 --- a/walker.go +++ b/walker.go @@ -101,7 +101,7 @@ func WalkLimit(root string, walkFn filepath.WalkFunc, limit int) error { return errors.New("kill received while walking") default: for _, d := range dirsToSkip { - if strings.Contains(p, d) { + if strings.HasPrefix(p, d) { return nil } } From d1b8ab0ff2142a9bffbe36be9d1d90f3b78516d4 Mon Sep 17 00:00:00 2001 From: Piotr Rojek Date: Fri, 26 Feb 2016 00:40:21 -0800 Subject: [PATCH 7/7] Typo in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7f0e5c0..a4213a1 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ The `WalkLimit` function does the same as `Walk`, except allows you to specify t In order to skip some nested directories, please call `SkipDir` function with paths to directories to be omitted: ``` -powerwalk.SkipDir(dir...string) +powerwalk.SkipDir(dir ...string) ``` See the [godoc documentation](http://godoc.org/github.com/stretchr/powerwalk) for more information.