Skip to content

Commit

Permalink
Merge pull request #227 from bzz/ci-fix-cgo-tests
Browse files Browse the repository at this point in the history
CI: fix cgo profiles
  • Loading branch information
bzz authored May 8, 2019
2 parents ae43e1a + f3ceaa6 commit 6ccf0b6
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 14 deletions.
18 changes: 9 additions & 9 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
dist: trusty

language: go

go:
- '1.12.x'
- '1.11.x'
Expand All @@ -10,17 +8,13 @@ env:
- GO_VERSION_FOR_JVM='1.11.x'
- CGO_ENABLED=0
- GO111MODULE=on
- ONIGURUMA_VERSION='6.9.1'
matrix:
- ONIGURUMA=0
- ONIGURUMA=1
matrix:
fast_finish: true

addons:
apt:
packages:
- libonig-dev

stages:
- name: test
- name: release
Expand All @@ -32,8 +26,14 @@ stage: test
install:
- >
if [[ "${ONIGURUMA}" -gt 0 ]]; then
export CGO_ENABLED=1;
export GO_TAGS='oniguruma';
export CGO_ENABLED=1
export GO_TAGS='oniguruma'
# install oniguruma manually as trusty has only ancient 5.x
sudo apt-get install -y dpkg # dpkg >= 1.17.5ubuntu5.8 fixes https://bugs.launchpad.net/ubuntu/+source/dpkg/+bug/1730627
wget "http://archive.ubuntu.com/ubuntu/pool/universe/libo/libonig/libonig5_${ONIGURUMA_VERSION}-1_amd64.deb"
sudo dpkg -i "libonig5_${ONIGURUMA_VERSION}-1_amd64.deb"
wget "http://archive.ubuntu.com/ubuntu/pool/universe/libo/libonig/libonig-dev_${ONIGURUMA_VERSION}-1_amd64.deb"
sudo dpkg -i "libonig-dev_${ONIGURUMA_VERSION}-1_amd64.deb"
fi;
script:
- make test-coverage
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/src-d/enry/v2
go 1.12

require (
github.com/src-d/go-oniguruma v1.0.0
github.com/src-d/go-oniguruma v1.1.0
github.com/stretchr/testify v1.3.0
github.com/toqueteos/trie v1.0.0 // indirect
gopkg.in/toqueteos/substring.v1 v1.0.2
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/src-d/go-oniguruma v1.0.0 h1:JDk5PUAjreGsGAKLsoDLNmrsaryjJ5RqT3h+Si6aw/E=
github.com/src-d/go-oniguruma v1.0.0/go.mod h1:chVbff8kcVtmrhxtZ3yBVLLquXbzCS6DrxQaAK/CeqM=
github.com/src-d/go-oniguruma v1.1.0 h1:EG+Nm5n2JqWUaCjtM0NtutPxU7ZN5Tp50GWrrV8bTww=
github.com/src-d/go-oniguruma v1.1.0/go.mod h1:chVbff8kcVtmrhxtZ3yBVLLquXbzCS6DrxQaAK/CeqM=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
Expand Down
2 changes: 1 addition & 1 deletion internal/tokenizer/tokenize_c.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

package tokenizer

import "gopkg.in/src-d/enry.v1/internal/tokenizer/flex"
import "github.com/src-d/enry/v2/internal/tokenizer/flex"

// Tokenize returns lexical tokens from content. The tokens returned match what
// the Linguist library returns. At most the first ByteLimit bytes of content are tokenized.
Expand Down
39 changes: 39 additions & 0 deletions internal/tokenizer/tokenize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,45 @@ func TestTokenize(t *testing.T) {
}
}

func TestTokenizerLatin1AsUtf8(t *testing.T) {
content := []byte("th\xe5 filling") // `th� filling`
t.Logf("%v - %q", content, string(content))
tokens := Tokenize(content)
for i, token := range tokens {
t.Logf("token %d, %s", i+1, token)
}
require.Equal(t, 3, len(tokens))
}

func TestRegexpOnInvalidUtf8(t *testing.T) {
origContent := []struct {
text string
tokens []string
}{
{"th\xe0 filling", []string{"th", "filling"}}, // `th� filling`
{"th\u0100 filling", []string{"th", "filling"}}, // `thĀ filling`
{"привет, как дела?", []string{}}, // empty, no ASCII tokens
}
re := reRegularToken

for _, content := range origContent {
t.Run("", func(t *testing.T) {
t.Logf("%v - %q", content, content.text)
input := []byte(content.text)
tokens := re.FindAll(input, -1)
require.Equal(t, len(content.tokens), len(tokens))

newContent := re.ReplaceAll(input, []byte(` `))
t.Logf("content:%q, tokens:[", newContent)
for i, token := range tokens {
t.Logf("\t%q,", string(token))
require.Equal(t, content.tokens[i], string(token))
}
t.Logf(" ]\n")
})
}
}

func BenchmarkTokenizer_BaselineCopy(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
Expand Down
2 changes: 1 addition & 1 deletion regex/oniguruma.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
type EnryRegexp = *rubex.Regexp

func MustCompile(str string) EnryRegexp {
return rubex.MustCompile(str)
return rubex.MustCompileASCII(str)
}

func QuoteMeta(s string) string {
Expand Down

0 comments on commit 6ccf0b6

Please sign in to comment.