Skip to content

Commit

Permalink
Merge pull request #223 from vmarkovtsev/master
Browse files Browse the repository at this point in the history
Extract LinesStats item and include it in --file-history
  • Loading branch information
vmarkovtsev authored Feb 28, 2019
2 parents 4982508 + b2dc585 commit 276e055
Show file tree
Hide file tree
Showing 9 changed files with 800 additions and 422 deletions.
184 changes: 97 additions & 87 deletions internal/pb/pb.pb.go

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion internal/pb/pb.proto
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ message ShotnessAnalysisResults {

message FileHistory {
repeated string commits = 1;
//map<int32, int32> changes_by_developer = 2;
map<int32, LineStats> changes_by_developer = 2;
}

message FileHistoryResultMessage {
Expand Down
120 changes: 88 additions & 32 deletions internal/pb/pb_pb2.py

Large diffs are not rendered by default.

161 changes: 161 additions & 0 deletions internal/plumbing/line_stats.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
package plumbing

import (
"unicode/utf8"

"github.com/sergi/go-diff/diffmatchpatch"
"gopkg.in/src-d/go-git.v4"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/object"
"gopkg.in/src-d/go-git.v4/utils/merkletrie"
"gopkg.in/src-d/hercules.v8/internal/core"
)

// LinesStatsCalculator measures line statistics for each text file in the commit.
type LinesStatsCalculator struct {
core.NoopMerger
}

// LineStats holds the numbers of inserted, deleted and changed lines.
type LineStats struct {
// Added is the number of added lines by a particular developer in a particular day.
Added int
// Removed is the number of removed lines by a particular developer in a particular day.
Removed int
// Changed is the number of changed lines by a particular developer in a particular day.
Changed int
}

const (
// DependencyLineStats is the identifier of the data provided by LinesStatsCalculator - line
// statistics for each file in the commit.
DependencyLineStats = "line_stats"
)

// Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
func (lsc *LinesStatsCalculator) Name() string {
return "LinesStats"
}

// Provides returns the list of names of entities which are produced by this PipelineItem.
// Each produced entity will be inserted into `deps` of dependent Consume()-s according
// to this list. Also used by core.Registry to build the global map of providers.
func (lsc *LinesStatsCalculator) Provides() []string {
arr := [...]string{DependencyLineStats}
return arr[:]
}

// Requires returns the list of names of entities which are needed by this PipelineItem.
// Each requested entity will be inserted into `deps` of Consume(). In turn, those
// entities are Provides() upstream.
func (lsc *LinesStatsCalculator) Requires() []string {
arr := [...]string{DependencyTreeChanges, DependencyBlobCache, DependencyFileDiff}
return arr[:]
}

// ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
func (lsc *LinesStatsCalculator) ListConfigurationOptions() []core.ConfigurationOption {
return nil
}

// Configure sets the properties previously published by ListConfigurationOptions().
func (lsc *LinesStatsCalculator) Configure(facts map[string]interface{}) error {
return nil
}

// Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
// calls. The repository which is going to be analysed is supplied as an argument.
func (lsc *LinesStatsCalculator) Initialize(repository *git.Repository) error {
return nil
}

// Consume runs this PipelineItem on the next commit data.
// `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
// Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
// This function returns the mapping with analysis results. The keys must be the same as
// in Provides(). If there was an error, nil is returned.
func (lsc *LinesStatsCalculator) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
result := map[object.ChangeEntry]LineStats{}
if deps[core.DependencyIsMerge].(bool) {
// we ignore merge commit diffs
// TODO(vmarkovtsev): handle them better
return map[string]interface{}{DependencyLineStats: result}, nil
}
treeDiff := deps[DependencyTreeChanges].(object.Changes)
cache := deps[DependencyBlobCache].(map[plumbing.Hash]*CachedBlob)
fileDiffs := deps[DependencyFileDiff].(map[string]FileDiffData)
for _, change := range treeDiff {
action, err := change.Action()
if err != nil {
return nil, err
}
switch action {
case merkletrie.Insert:
blob := cache[change.To.TreeEntry.Hash]
lines, err := blob.CountLines()
if err != nil {
// binary
continue
}
result[change.To] = LineStats{
Added: lines,
Removed: 0,
Changed: 0,
}
case merkletrie.Delete:
blob := cache[change.From.TreeEntry.Hash]
lines, err := blob.CountLines()
if err != nil {
// binary
continue
}
result[change.From] = LineStats{
Added: 0,
Removed: lines,
Changed: 0,
}
case merkletrie.Modify:
thisDiffs := fileDiffs[change.To.Name]
var added, removed, changed, removedPending int
for _, edit := range thisDiffs.Diffs {
switch edit.Type {
case diffmatchpatch.DiffEqual:
if removedPending > 0 {
removed += removedPending
}
removedPending = 0
case diffmatchpatch.DiffInsert:
delta := utf8.RuneCountInString(edit.Text)
if removedPending > delta {
changed += delta
removed += removedPending - delta
} else {
changed += removedPending
added += delta - removedPending
}
removedPending = 0
case diffmatchpatch.DiffDelete:
removedPending = utf8.RuneCountInString(edit.Text)
}
}
if removedPending > 0 {
removed += removedPending
}
result[change.To] = LineStats{
Added: added,
Removed: removed,
Changed: changed,
}
}
}
return map[string]interface{}{DependencyLineStats: result}, nil
}

// Fork clones this PipelineItem.
func (lsc *LinesStatsCalculator) Fork(n int) []core.PipelineItem {
return core.ForkSamePipelineItem(lsc, n)
}

func init() {
core.Registry.Register(&LinesStatsCalculator{})
}
130 changes: 130 additions & 0 deletions internal/plumbing/line_stats_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
package plumbing_test

import (
"testing"

"github.com/stretchr/testify/assert"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/object"
"gopkg.in/src-d/hercules.v8/internal/core"
items "gopkg.in/src-d/hercules.v8/internal/plumbing"
"gopkg.in/src-d/hercules.v8/internal/plumbing/identity"
"gopkg.in/src-d/hercules.v8/internal/test"
"gopkg.in/src-d/hercules.v8/internal/test/fixtures"
)

func TestLinesStatsMeta(t *testing.T) {
ra := &items.LinesStatsCalculator{}
assert.Equal(t, ra.Name(), "LinesStats")
assert.Equal(t, len(ra.Provides()), 1)
assert.Equal(t, ra.Provides()[0], items.DependencyLineStats)
assert.Equal(t, len(ra.Requires()), 3)
assert.Equal(t, ra.Requires()[0], items.DependencyTreeChanges)
assert.Equal(t, ra.Requires()[1], items.DependencyBlobCache)
assert.Equal(t, ra.Requires()[2], items.DependencyFileDiff)
assert.Nil(t, ra.ListConfigurationOptions())
assert.Nil(t, ra.Configure(nil))
for _, f := range ra.Fork(10) {
assert.Equal(t, f, ra)
}
}

func TestLinesStatsRegistration(t *testing.T) {
summoned := core.Registry.Summon((&items.LinesStatsCalculator{}).Name())
assert.Len(t, summoned, 1)
assert.Equal(t, summoned[0].Name(), "LinesStats")
summoned = core.Registry.Summon((&items.LinesStatsCalculator{}).Provides()[0])
assert.True(t, len(summoned) >= 1)
matched := false
for _, tp := range summoned {
matched = matched || tp.Name() == "LinesStats"
}
assert.True(t, matched)
}

func TestLinesStatsConsume(t *testing.T) {
deps := map[string]interface{}{}

// stage 1
deps[identity.DependencyAuthor] = 0
cache := map[plumbing.Hash]*items.CachedBlob{}
items.AddHash(t, cache, "291286b4ac41952cbd1389fda66420ec03c1a9fe")
items.AddHash(t, cache, "c29112dbd697ad9b401333b80c18a63951bc18d9")
items.AddHash(t, cache, "baa64828831d174f40140e4b3cfa77d1e917a2c1")
items.AddHash(t, cache, "dc248ba2b22048cc730c571a748e8ffcf7085ab9")
deps[items.DependencyBlobCache] = cache
changes := make(object.Changes, 3)
treeFrom, _ := test.Repository.TreeObject(plumbing.NewHash(
"a1eb2ea76eb7f9bfbde9b243861474421000eb96"))
treeTo, _ := test.Repository.TreeObject(plumbing.NewHash(
"994eac1cd07235bb9815e547a75c84265dea00f5"))
changes[0] = &object.Change{From: object.ChangeEntry{
Name: "analyser.go",
Tree: treeFrom,
TreeEntry: object.TreeEntry{
Name: "analyser.go",
Mode: 0100644,
Hash: plumbing.NewHash("dc248ba2b22048cc730c571a748e8ffcf7085ab9"),
},
}, To: object.ChangeEntry{
Name: "analyser2.go",
Tree: treeTo,
TreeEntry: object.TreeEntry{
Name: "analyser2.go",
Mode: 0100644,
Hash: plumbing.NewHash("baa64828831d174f40140e4b3cfa77d1e917a2c1"),
},
}}
changes[1] = &object.Change{From: object.ChangeEntry{}, To: object.ChangeEntry{
Name: "cmd/hercules/main.go",
Tree: treeTo,
TreeEntry: object.TreeEntry{
Name: "cmd/hercules/main.go",
Mode: 0100644,
Hash: plumbing.NewHash("c29112dbd697ad9b401333b80c18a63951bc18d9"),
},
},
}
changes[2] = &object.Change{From: object.ChangeEntry{
Name: ".travis.yml",
Tree: treeTo,
TreeEntry: object.TreeEntry{
Name: ".travis.yml",
Mode: 0100644,
Hash: plumbing.NewHash("291286b4ac41952cbd1389fda66420ec03c1a9fe"),
},
}, To: object.ChangeEntry{},
}
deps[items.DependencyTreeChanges] = changes
fd := fixtures.FileDiff()
result, err := fd.Consume(deps)
assert.Nil(t, err)
deps[items.DependencyFileDiff] = result[items.DependencyFileDiff]
deps[core.DependencyCommit], _ = test.Repository.CommitObject(plumbing.NewHash(
"cce947b98a050c6d356bc6ba95030254914027b1"))
deps[core.DependencyIsMerge] = false
lsc := &items.LinesStatsCalculator{}
result, err = lsc.Consume(deps)
assert.Nil(t, err)
stats := result[items.DependencyLineStats].(map[object.ChangeEntry]items.LineStats)
assert.Len(t, stats, 3)
nameMap := map[string]items.LineStats{}
for ch, val := range stats {
nameMap[ch.Name] = val
}
assert.Equal(t, nameMap["analyser2.go"], items.LineStats{
Added: 628,
Removed: 9,
Changed: 67,
})
assert.Equal(t, nameMap[".travis.yml"], items.LineStats{
Added: 0,
Removed: 12,
Changed: 0,
})
assert.Equal(t, nameMap["cmd/hercules/main.go"], items.LineStats{
Added: 207,
Removed: 0,
Changed: 0,
})
}
Loading

0 comments on commit 276e055

Please sign in to comment.