From 464dc7f1561ca59255c1a812745bb54e857095c4 Mon Sep 17 00:00:00 2001 From: Vadim Markovtsev Date: Fri, 10 Jan 2020 12:40:05 +0100 Subject: [PATCH] Add exact signature id matching Fixes #340 Signed-off-by: Vadim Markovtsev --- internal/plumbing/identity/identity.go | 97 ++++++++++++++------- internal/plumbing/identity/identity_test.go | 62 +++++++++---- python/setup.py | 2 +- 3 files changed, 113 insertions(+), 48 deletions(-) diff --git a/internal/plumbing/identity/identity.go b/internal/plumbing/identity/identity.go index 3d82b036..9095352d 100644 --- a/internal/plumbing/identity/identity.go +++ b/internal/plumbing/identity/identity.go @@ -21,6 +21,9 @@ type Detector struct { PeopleDict map[string]int // ReversedPeopleDict maps developer id -> description ReversedPeopleDict []string + // ExactSignatures chooses the matching algorithm: opportunistic email || name + // or exact email && name + ExactSignatures bool l core.Logger } @@ -43,6 +46,10 @@ const ( // ConfigIdentityDetectorPeopleDictPath is the name of the configuration option // (Detector.Configure()) which allows to set the external PeopleDict mapping from a file. ConfigIdentityDetectorPeopleDictPath = "IdentityDetector.PeopleDictPath" + // ConfigIdentityDetectorExactSignatures is the name of the configuration option + // (Detector.Configure()) which changes the matching algorithm to exact signature (name + email) + // correspondence. + ConfigIdentityDetectorExactSignatures = "IdentityDetector.ExactSignatures" // FactIdentityDetectorPeopleCount is the name of the fact which is inserted in // Detector.Configure(). It is equal to the overall number of unique authors // (the length of ReversedPeopleDict). @@ -78,7 +85,13 @@ func (detector *Detector) ListConfigurationOptions() []core.ConfigurationOption Description: "Path to the file with developer -> name|email associations.", Flag: "people-dict", Type: core.PathConfigurationOption, - Default: ""}, + Default: ""}, { + Name: ConfigIdentityDetectorExactSignatures, + Description: "Disable separate name/email matching. This will lead to considerbly more " + + "identities and should not be normally used.", + Flag: "exact-signatures", + Type: core.BoolConfigurationOption, + Default: false}, } return options[:] } @@ -96,6 +109,9 @@ func (detector *Detector) Configure(facts map[string]interface{}) error { if val, exists := facts[FactIdentityDetectorReversedPeopleDict].([]string); exists { detector.ReversedPeopleDict = val } + if val, exists := facts[ConfigIdentityDetectorExactSignatures].(bool); exists { + detector.ExactSignatures = val + } if detector.PeopleDict == nil || detector.ReversedPeopleDict == nil { peopleDictPath, _ := facts[ConfigIdentityDetectorPeopleDictPath].(string) if peopleDictPath != "" { @@ -133,13 +149,19 @@ func (detector *Detector) Initialize(repository *git.Repository) error { // in Provides(). If there was an error, nil is returned. func (detector *Detector) Consume(deps map[string]interface{}) (map[string]interface{}, error) { commit := deps[core.DependencyCommit].(*object.Commit) + var authorID int + var exists bool signature := commit.Author - authorID, exists := detector.PeopleDict[strings.ToLower(signature.Email)] - if !exists { - authorID, exists = detector.PeopleDict[strings.ToLower(signature.Name)] + if !detector.ExactSignatures { + authorID, exists = detector.PeopleDict[strings.ToLower(signature.Email)] if !exists { - authorID = AuthorMissing + authorID, exists = detector.PeopleDict[strings.ToLower(signature.Name)] } + } else { + authorID, exists = detector.PeopleDict[strings.ToLower(signature.String())] + } + if !exists { + authorID = AuthorMissing } return map[string]interface{}{DependencyAuthor: authorID}, nil } @@ -184,7 +206,8 @@ func (detector *Detector) GeneratePeopleDict(commits []*object.Commit) { size := 0 mailmapFile, err := commits[len(commits)-1].File(".mailmap") - if err == nil { + // TODO(vmarkovtsev): properly handle .mailmap if ExactSignatures + if !detector.ExactSignatures && err == nil { mailMapContents, err := mailmapFile.Contents() if err == nil { mailmap := ParseMailmap(mailMapContents) @@ -239,34 +262,48 @@ func (detector *Detector) GeneratePeopleDict(commits []*object.Commit) { } for _, commit := range commits { - email := strings.ToLower(commit.Author.Email) - name := strings.ToLower(commit.Author.Name) - id, exists := dict[email] - if exists { - _, exists := dict[name] - if !exists { - dict[name] = id - names[id] = append(names[id], name) + if !detector.ExactSignatures { + email := strings.ToLower(commit.Author.Email) + name := strings.ToLower(commit.Author.Name) + id, exists := dict[email] + if exists { + _, exists := dict[name] + if !exists { + dict[name] = id + names[id] = append(names[id], name) + } + continue + } + id, exists = dict[name] + if exists { + dict[email] = id + emails[id] = append(emails[id], email) + continue + } + dict[email] = size + dict[name] = size + emails[size] = append(emails[size], email) + names[size] = append(names[size], name) + size++ + } else { // !detector.ExactSignatures + sig := strings.ToLower(commit.Author.String()) + if _, exists := dict[sig]; !exists { + dict[sig] = size + size++ } - continue - } - id, exists = dict[name] - if exists { - dict[email] = id - emails[id] = append(emails[id], email) - continue } - dict[email] = size - dict[name] = size - emails[size] = append(emails[size], email) - names[size] = append(names[size], name) - size++ } reverseDict := make([]string, size) - for _, val := range dict { - sort.Strings(names[val]) - sort.Strings(emails[val]) - reverseDict[val] = strings.Join(names[val], "|") + "|" + strings.Join(emails[val], "|") + if !detector.ExactSignatures { + for _, val := range dict { + sort.Strings(names[val]) + sort.Strings(emails[val]) + reverseDict[val] = strings.Join(names[val], "|") + "|" + strings.Join(emails[val], "|") + } + } else { + for key, val := range dict { + reverseDict[val] = key + } } detector.PeopleDict = dict detector.ReversedPeopleDict = reverseDict diff --git a/internal/plumbing/identity/identity_test.go b/internal/plumbing/identity/identity_test.go index 30b65e04..716447b4 100644 --- a/internal/plumbing/identity/identity_test.go +++ b/internal/plumbing/identity/identity_test.go @@ -39,8 +39,9 @@ func TestIdentityDetectorMeta(t *testing.T) { assert.Equal(t, len(id.Provides()), 1) assert.Equal(t, id.Provides()[0], DependencyAuthor) opts := id.ListConfigurationOptions() - assert.Len(t, opts, 1) + assert.Len(t, opts, 2) assert.Equal(t, opts[0].Name, ConfigIdentityDetectorPeopleDictPath) + assert.Equal(t, opts[1].Name, ConfigIdentityDetectorExactSignatures) logger := core.NewLogger() assert.NoError(t, id.Configure(map[string]interface{}{ core.ConfigLogger: logger, @@ -150,6 +151,28 @@ func TestIdentityDetectorConsume(t *testing.T) { assert.Equal(t, res[DependencyAuthor].(int), AuthorMissing) } +func TestIdentityDetectorConsumeExact(t *testing.T) { + commit, _ := test.Repository.CommitObject(plumbing.NewHash( + "5c0e755dd85ac74584d9988cc361eccf02ce1a48")) + deps := map[string]interface{}{} + deps[core.DependencyCommit] = commit + id := fixtureIdentityDetector() + id.ExactSignatures = true + id.PeopleDict = map[string]int{ + "vadim markovtsev ": 0, + "vadim markovtsev ": 1, + } + res, err := id.Consume(deps) + assert.Nil(t, err) + assert.Equal(t, res[DependencyAuthor].(int), 1) + commit, _ = test.Repository.CommitObject(plumbing.NewHash( + "8a03b5620b1caa72ec9cb847ea88332621e2950a")) + deps[core.DependencyCommit] = commit + res, err = id.Consume(deps) + assert.Nil(t, err) + assert.Equal(t, res[DependencyAuthor].(int), AuthorMissing) +} + func TestIdentityDetectorLoadPeopleDict(t *testing.T) { id := fixtureIdentityDetector() err := id.LoadPeopleDict(path.Join("..", "..", "test_data", "identities")) @@ -175,22 +198,6 @@ func TestIdentityDetectorLoadPeopleDictWrongPath(t *testing.T) { assert.NotNil(t, err) } -/* -// internal compiler error in 1.8 -func TestGeneratePeopleDict(t *testing.T) { - id := fixtureIdentityDetector() - commits := make([]*object.Commit, 0) - iter, err := test.Repository.CommitObjects() - for ; err != io.EOF; commit, err := iter.Next() { - if err != nil { - panic(err) - } - commits = append(commits, commit) - } - id.GeneratePeopleDict(commits) -} -*/ - func TestIdentityDetectorGeneratePeopleDict(t *testing.T) { id := fixtureIdentityDetector() commits := make([]*object.Commit, 0) @@ -244,6 +251,27 @@ func TestIdentityDetectorGeneratePeopleDict(t *testing.T) { assert.NotEqual(t, id.ReversedPeopleDict[len(id.ReversedPeopleDict)-1], AuthorMissingName) } +func TestIdentityDetectorGeneratePeopleDictExact(t *testing.T) { + id := fixtureIdentityDetector() + id.ExactSignatures = true + commits := make([]*object.Commit, 0) + iter, err := test.Repository.CommitObjects() + commit, err := iter.Next() + for ; err != io.EOF; commit, err = iter.Next() { + if err != nil { + panic(err) + } + commits = append(commits, commit) + } + id.GeneratePeopleDict(commits) + ass := assert.New(t) + ass.Equal(len(id.PeopleDict), len(id.ReversedPeopleDict)) + ass.True(len(id.ReversedPeopleDict) >= 24) + ass.Contains(id.PeopleDict, "vadim markovtsev ") + ass.Contains(id.PeopleDict, "vadim markovtsev ") + ass.NotEqual(id.ReversedPeopleDict[len(id.ReversedPeopleDict)-1], AuthorMissingName) +} + func TestIdentityDetectorLoadPeopleDictInvalidPath(t *testing.T) { id := fixtureIdentityDetector() ipath := "/xxxyyyzzzInvalidPath!hehe" diff --git a/python/setup.py b/python/setup.py index 903ea053..67a58064 100644 --- a/python/setup.py +++ b/python/setup.py @@ -22,7 +22,7 @@ description="Python companion for github.com/src-d/hercules to visualize the results.", long_description=long_description, long_description_content_type="text/markdown", - version="10.7.0", + version="10.7.1", license="Apache-2.0", author="source{d}", author_email="machine-learning@sourced.tech",