Skip to content

Commit

Permalink
Merge pull request #105 from vmarkovtsev/master
Browse files Browse the repository at this point in the history
Add filtering changes by language
  • Loading branch information
vmarkovtsev authored Sep 17, 2018
2 parents 75e4d76 + e70948c commit 9b8478d
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 65 deletions.
97 changes: 84 additions & 13 deletions internal/plumbing/tree_diff.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package plumbing

import (
"fmt"
"gopkg.in/src-d/enry.v1"
"io"
"log"
"strings"
Expand All @@ -18,8 +20,11 @@ import (
type TreeDiff struct {
core.NoopMerger
SkipDirs []string
Languages map[string]bool

previousTree *object.Tree
previousCommit plumbing.Hash
repository *git.Repository
}

const (
Expand All @@ -31,6 +36,13 @@ const (
// ConfigTreeDiffBlacklistedDirs s the name of the configuration option
// (TreeDiff.Configure()) which allows to set blacklisted directories.
ConfigTreeDiffBlacklistedDirs = "TreeDiff.BlacklistedDirs"
// ConfigTreeDiffLanguages is the name of the configuration option (TreeDiff.Configure())
// which sets the list of programming languages to analyze. Language names are at
// https://doc.bblf.sh/languages.html Names are joined with a comma ",".
// "all" is the special name which disables this filter.
ConfigTreeDiffLanguages = "TreeDiff.Languages"
// allLanguages denotes passing all files in.
allLanguages = "all"
)

var defaultBlacklistedDirs = []string{"vendor/", "vendors/", "node_modules/"}
Expand Down Expand Up @@ -67,7 +79,15 @@ func (treediff *TreeDiff) ListConfigurationOptions() []core.ConfigurationOption
Description: "List of blacklisted directories. Separated by comma \",\".",
Flag: "blacklisted-dirs",
Type: core.StringsConfigurationOption,
Default: defaultBlacklistedDirs},
Default: defaultBlacklistedDirs}, {
Name: ConfigTreeDiffLanguages,
Description: fmt.Sprintf(
"List of programming languages to analyze. Separated by comma \",\". " +
"Names are at https://doc.bblf.sh/languages.html \"%s\" is the special name " +
"which disables this filter and lets all the files through.", allLanguages),
Flag: "languages",
Type: core.StringsConfigurationOption,
Default: []string{allLanguages}},
}
return options[:]
}
Expand All @@ -77,12 +97,26 @@ func (treediff *TreeDiff) Configure(facts map[string]interface{}) {
if val, exist := facts[ConfigTreeDiffEnableBlacklist]; exist && val.(bool) {
treediff.SkipDirs = facts[ConfigTreeDiffBlacklistedDirs].([]string)
}
if val, exists := facts[ConfigTreeDiffLanguages].(string); exists {
treediff.Languages = map[string]bool{}
for _, lang := range strings.Split(val, ",") {
treediff.Languages[strings.TrimSpace(lang)] = true
}
} else if treediff.Languages == nil {
treediff.Languages = map[string]bool{}
treediff.Languages[allLanguages] = true
}
}

// Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
// calls. The repository which is going to be analysed is supplied as an argument.
func (treediff *TreeDiff) Initialize(repository *git.Repository) {
treediff.previousTree = nil
treediff.repository = repository
if treediff.Languages == nil {
treediff.Languages = map[string]bool{}
treediff.Languages[allLanguages] = true
}
}

// Consume runs this PipelineItem on the next commit data.
Expand Down Expand Up @@ -124,6 +158,13 @@ func (treediff *TreeDiff) Consume(deps map[string]interface{}) (map[string]inter
}
return err
}
pass, err := treediff.checkLanguage(file.Name, file.Hash)
if err != nil {
return err
}
if !pass {
continue
}
diff = append(diff, &object.Change{
To: object.ChangeEntry{Name: file.Name, Tree: tree, TreeEntry: object.TreeEntry{
Name: file.Name, Mode: file.Mode, Hash: file.Hash}}})
Expand All @@ -137,21 +178,29 @@ func (treediff *TreeDiff) Consume(deps map[string]interface{}) (map[string]inter
treediff.previousTree = tree
treediff.previousCommit = commit.Hash

if len(treediff.SkipDirs) > 0 {
// filter without allocation
filteredDiff := make([]*object.Change, 0, len(diff))
OUTER:
for _, change := range diff {
for _, dir := range treediff.SkipDirs {
if strings.HasPrefix(change.To.Name, dir) || strings.HasPrefix(change.From.Name, dir) {
continue OUTER
}
// filter without allocation
filteredDiff := make([]*object.Change, 0, len(diff))
OUTER:
for _, change := range diff {
for _, dir := range treediff.SkipDirs {
if strings.HasPrefix(change.To.Name, dir) || strings.HasPrefix(change.From.Name, dir) {
continue OUTER
}
filteredDiff = append(filteredDiff, change)
}

diff = filteredDiff
var changeEntry object.ChangeEntry
if change.To.Tree == nil {
changeEntry = change.From
} else {
changeEntry = change.To
}
pass, _ := treediff.checkLanguage(changeEntry.Name, changeEntry.TreeEntry.Hash)
if !pass {
continue
}
filteredDiff = append(filteredDiff, change)
}

diff = filteredDiff
return map[string]interface{}{DependencyTreeChanges: diff}, nil
}

Expand All @@ -160,6 +209,28 @@ func (treediff *TreeDiff) Fork(n int) []core.PipelineItem {
return core.ForkCopyPipelineItem(treediff, n)
}

// checkLanguage returns whether the blob corresponds to the list of required languages.
func (treediff *TreeDiff) checkLanguage(name string, blobHash plumbing.Hash) (bool, error) {
if treediff.Languages[allLanguages] {
return true, nil
}
blob, err := treediff.repository.BlobObject(blobHash)
if err != nil {
return false, err
}
reader, err := blob.Reader()
if err != nil {
return false, err
}
buffer := make([]byte, 1024)
_, err = reader.Read(buffer)
if err != nil {
return false, err
}
lang := enry.GetLanguage(name, buffer)
return treediff.Languages[lang], nil
}

func init() {
core.Registry.Register(&TreeDiff{})
}
44 changes: 43 additions & 1 deletion internal/plumbing/tree_diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ func TestTreeDiffMeta(t *testing.T) {
assert.Equal(t, len(td.Provides()), 1)
assert.Equal(t, td.Provides()[0], DependencyTreeChanges)
opts := td.ListConfigurationOptions()
assert.Len(t, opts, 2)
assert.Len(t, opts, 3)
}

func TestTreeDiffRegistration(t *testing.T) {
Expand Down Expand Up @@ -115,6 +115,7 @@ func TestTreeDiffBadCommit(t *testing.T) {
func TestTreeDiffConsumeSkip(t *testing.T) {
// consume without skiping
td := fixtureTreeDiff()
assert.Contains(t, td.Languages, allLanguages)
commit, _ := test.Repository.CommitObject(plumbing.NewHash(
"aefdedf7cafa6ee110bae9a3910bf5088fdeb5a9"))
deps := map[string]interface{}{}
Expand Down Expand Up @@ -142,6 +143,47 @@ func TestTreeDiffConsumeSkip(t *testing.T) {
assert.Equal(t, 31, len(changes))
}

func TestTreeDiffConsumeLanguageFilterFirst(t *testing.T) {
td := fixtureTreeDiff()
td.Configure(map[string]interface{}{ConfigTreeDiffLanguages: "Go"})
commit, _ := test.Repository.CommitObject(plumbing.NewHash(
"fbe766ffdc3f87f6affddc051c6f8b419beea6a2"))
deps := map[string]interface{}{}
deps[core.DependencyCommit] = commit
res, err := td.Consume(deps)
assert.Nil(t, err)
assert.Equal(t, len(res), 1)
changes := res[DependencyTreeChanges].(object.Changes)
assert.Equal(t, len(changes), 6)
assert.Equal(t, changes[0].To.Name, "analyser.go")
assert.Equal(t, changes[1].To.Name, "cmd/hercules/main.go")
assert.Equal(t, changes[2].To.Name, "doc.go")
assert.Equal(t, changes[3].To.Name, "file.go")
assert.Equal(t, changes[4].To.Name, "file_test.go")
assert.Equal(t, changes[5].To.Name, "rbtree.go")
}

func TestTreeDiffConsumeLanguageFilter(t *testing.T) {
td := fixtureTreeDiff()
td.Configure(map[string]interface{}{ConfigTreeDiffLanguages: "Python"})
commit, _ := test.Repository.CommitObject(plumbing.NewHash(
"e89c1d10fb31e32668ad905eb59dc44d7a4a021e"))
deps := map[string]interface{}{}
deps[core.DependencyCommit] = commit
res, err := td.Consume(deps)
assert.Nil(t, err)
assert.Equal(t, len(res), 1)
commit, _ = test.Repository.CommitObject(plumbing.NewHash(
"fbe766ffdc3f87f6affddc051c6f8b419beea6a2"))
deps[core.DependencyCommit] = commit
res, err = td.Consume(deps)
assert.Nil(t, err)
assert.Equal(t, len(res), 1)
changes := res[DependencyTreeChanges].(object.Changes)
assert.Equal(t, len(changes), 1)
assert.Equal(t, changes[0].To.Name, "labours.py")
}

func TestTreeDiffFork(t *testing.T) {
td1 := fixtureTreeDiff()
td1.SkipDirs = append(td1.SkipDirs, "skip")
Expand Down
29 changes: 1 addition & 28 deletions internal/plumbing/uast/uast.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (
"gopkg.in/bblfsh/client-go.v2"
"gopkg.in/bblfsh/sdk.v1/protocol"
"gopkg.in/bblfsh/sdk.v1/uast"
"gopkg.in/src-d/enry.v1"
"gopkg.in/src-d/go-git.v4"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/object"
Expand All @@ -37,7 +36,6 @@ type Extractor struct {
Endpoint string
Context func() (context.Context, context.CancelFunc)
PoolSize int
Languages map[string]bool
FailOnErrors bool
ProcessedFiles map[string]int

Expand All @@ -60,11 +58,6 @@ const (
// ConfigUASTFailOnErrors is the name of the configuration option (Extractor.Configure())
// which enables early exit in case of any Babelfish UAST parsing errors.
ConfigUASTFailOnErrors = "ConfigUASTFailOnErrors"
// ConfigUASTLanguages is the name of the configuration option (Extractor.Configure())
// which sets the list of languages to parse. Language names are at
// https://doc.bblf.sh/languages.html Names are joined with a comma ",".
ConfigUASTLanguages = "ConfigUASTLanguages"

// FeatureUast is the name of the Pipeline feature which activates all the items related to UAST.
FeatureUast = "uast"
// DependencyUasts is the name of the dependency provided by Extractor.
Expand Down Expand Up @@ -140,12 +133,7 @@ func (exr *Extractor) ListConfigurationOptions() []core.ConfigurationOption {
Description: "Panic if there is a UAST extraction error.",
Flag: "bblfsh-fail-on-error",
Type: core.BoolConfigurationOption,
Default: false}, {
Name: ConfigUASTLanguages,
Description: "Programming languages from which to extract UASTs. Separated by comma \",\".",
Flag: "languages",
Type: core.StringConfigurationOption,
Default: "Python,Java,Go,JavaScript,Ruby,PHP"},
Default: false},
}
return options[:]
}
Expand All @@ -164,12 +152,6 @@ func (exr *Extractor) Configure(facts map[string]interface{}) {
if val, exists := facts[ConfigUASTPoolSize].(int); exists {
exr.PoolSize = val
}
if val, exists := facts[ConfigUASTLanguages].(string); exists {
exr.Languages = map[string]bool{}
for _, lang := range strings.Split(val, ",") {
exr.Languages[strings.TrimSpace(lang)] = true
}
}
if val, exists := facts[ConfigUASTFailOnErrors].(bool); exists {
exr.FailOnErrors = val
}
Expand Down Expand Up @@ -210,9 +192,6 @@ func (exr *Extractor) Initialize(repository *git.Repository) {
panic("UAST goroutine pool was not created")
}
exr.ProcessedFiles = map[string]int{}
if exr.Languages == nil {
exr.Languages = map[string]bool{}
}
}

// Consume runs this PipelineItem on the next commit data.
Expand All @@ -235,17 +214,11 @@ func (exr *Extractor) Consume(deps map[string]interface{}) (map[string]interface
return
}
defer ioutil.CheckClose(reader, &err)

buf := new(bytes.Buffer)
if _, err := buf.ReadFrom(reader); err != nil {
errs = append(errs, err)
return
}
lang := enry.GetLanguage(change.To.Name, buf.Bytes())
if _, exists := exr.Languages[lang]; !exists {
exr.ProcessedFiles[change.To.Name] = uastExtractionSkipped
return
}
exr.ProcessedFiles[change.To.Name]++
}
wg.Add(1)
Expand Down
Loading

0 comments on commit 9b8478d

Please sign in to comment.