diff --git a/command/exclude.go b/command/exclude.go index dbf9a2505..8bdea1a5b 100644 --- a/command/exclude.go +++ b/command/exclude.go @@ -4,23 +4,19 @@ import ( "path/filepath" "regexp" "strings" -) -func wildCardToRegexp(pattern string) string { - patternRegex := regexp.QuoteMeta(pattern) - patternRegex = strings.Replace(patternRegex, "\\?", ".", -1) - patternRegex = strings.Replace(patternRegex, "\\*", ".*", -1) - patternRegex = "^" + patternRegex + "$" - return patternRegex -} + "github.com/peak/s5cmd/strutil" +) // createExcludesFromWildcard creates regex strings from wildcard. func createExcludesFromWildcard(inputExcludes []string) ([]*regexp.Regexp, error) { var result []*regexp.Regexp for _, input := range inputExcludes { if input != "" { - regexVersion := wildCardToRegexp(input) - regexpCompiled, err := regexp.Compile(regexVersion) + regex := strutil.WildCardToRegexp(input) + regex = strutil.MatchFromStartToEnd(regex) + regex = strutil.AddNewLineFlag(regex) + regexpCompiled, err := regexp.Compile(regex) if err != nil { return nil, err } diff --git a/e2e/ls_test.go b/e2e/ls_test.go index e69987429..42351bb59 100644 --- a/e2e/ls_test.go +++ b/e2e/ls_test.go @@ -144,6 +144,32 @@ func TestListSingleWildcardS3Object(t *testing.T) { }, alignment(true)) } +func TestListWildcardS3ObjectWithNewLineInName(t *testing.T) { + t.Parallel() + + bucket := s3BucketFromTestName(t) + + s3client, s5cmd := setup(t) + + createBucket(t, s3client, bucket) + putFile(t, s3client, bucket, "normal.txt", "this is a file content") + putFile(t, s3client, bucket, "another.txt", "this is another file content") + putFile(t, s3client, bucket, "newli\ne.txt", "this is yet another file content") + putFile(t, s3client, bucket, "nap.txt", "this, too, is a file content") + + cmd := s5cmd("ls", "s3://"+bucket+"/n*.txt") + result := icmd.RunCmd(cmd) + + result.Assert(t, icmd.Success) + + assertLines(t, result.Stdout(), map[int]compareFunc{ + 0: suffix("28 nap.txt"), + 1: suffix("32 newli"), + 2: equals("e.txt"), + 3: suffix("22 normal.txt"), + }) +} + // ls -s bucket/object func TestListS3ObjectsWithDashS(t *testing.T) { t.Parallel() @@ -399,6 +425,8 @@ func TestListS3ObjectsWithExcludeFilter(t *testing.T) { "a/file.c", "file2.txt", "file2.txt.extension", // this should not be excluded. + "newli\ne", + "newli\ne.txt", } s3client, s5cmd := setup(t) @@ -419,7 +447,9 @@ func TestListS3ObjectsWithExcludeFilter(t *testing.T) { 1: match(`a/try.py`), 2: match(`file.py`), 3: match(`file2.txt.extension`), - }, trimMatch(dateRe), alignment(true)) + 4: match("newli"), + 5: match("e"), + }, trimMatch(dateRe), alignment(false)) } // ls --exclude ".txt" --exclude ".py" s3://bucket diff --git a/e2e/util_test.go b/e2e/util_test.go index 6c74a4521..06d5044c8 100644 --- a/e2e/util_test.go +++ b/e2e/util_test.go @@ -23,6 +23,9 @@ import ( "testing" "time" + "github.com/peak/s5cmd/storage" + "github.com/peak/s5cmd/strutil" + "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/awserr" "github.com/aws/aws-sdk-go/aws/client" @@ -34,7 +37,6 @@ import ( "github.com/google/go-cmp/cmp" "github.com/iancoleman/strcase" "github.com/igungor/gofakes3" - "github.com/peak/s5cmd/storage" "gotest.tools/v3/assert" "gotest.tools/v3/fs" "gotest.tools/v3/icmd" @@ -554,7 +556,7 @@ func replaceMatchWithSpace(input string, match ...string) string { if m == "" { continue } - re := regexp.MustCompile(m) + re := regexp.MustCompile(strutil.AddNewLineFlag(m)) input = re.ReplaceAllString(input, " ") } @@ -735,7 +737,7 @@ func alignment(v bool) func(*assertOpts) { } func trimMatch(match string) func(*assertOpts) { - re := regexp.MustCompile(match) + re := regexp.MustCompile(strutil.AddNewLineFlag(match)) return func(opts *assertOpts) { opts.trimRegexes = append(opts.trimRegexes, re) } @@ -863,7 +865,7 @@ func checkLineAlignments(actual string) error { } func match(expected string) compareFunc { - re := regexp.MustCompile(expected) + re := regexp.MustCompile(strutil.AddNewLineFlag(expected)) return func(actual string) error { if re.MatchString(actual) { return nil diff --git a/storage/url/url.go b/storage/url/url.go index 755227fff..4cfc53f2f 100644 --- a/storage/url/url.go +++ b/storage/url/url.go @@ -10,6 +10,8 @@ import ( "regexp" "runtime" "strings" + + "github.com/peak/s5cmd/strutil" ) const ( @@ -249,9 +251,7 @@ func (u *URL) setPrefixAndFilter() error { return nil } - loc := strings.IndexAny(u.Path, globCharacters) - wildOperation := loc > -1 - if !wildOperation { + if loc := strings.IndexAny(u.Path, globCharacters); loc < 0 { u.Delimiter = s3Separator u.Prefix = u.Path } else { @@ -261,12 +261,12 @@ func (u *URL) setPrefixAndFilter() error { filterRegex := matchAllRe if u.filter != "" { - filterRegex = regexp.QuoteMeta(u.filter) - filterRegex = strings.Replace(filterRegex, "\\?", ".", -1) - filterRegex = strings.Replace(filterRegex, "\\*", ".*?", -1) + filterRegex = strutil.WildCardToRegexp(u.filter) } filterRegex = regexp.QuoteMeta(u.Prefix) + filterRegex - r, err := regexp.Compile("^" + filterRegex + "$") + filterRegex = strutil.MatchFromStartToEnd(filterRegex) + filterRegex = strutil.AddNewLineFlag(filterRegex) + r, err := regexp.Compile(filterRegex) if err != nil { return err } diff --git a/storage/url/url_test.go b/storage/url/url_test.go index 4aef7eaa3..fa19f262c 100644 --- a/storage/url/url_test.go +++ b/storage/url/url_test.go @@ -8,6 +8,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" + "github.com/peak/s5cmd/strutil" ) func TestHasWild(t *testing.T) { @@ -70,7 +71,7 @@ func TestNew(t *testing.T) { Prefix: "key", Delimiter: "/", }, - wantFilterRe: regexp.MustCompile(`^key.*$`).String(), + wantFilterRe: regexp.MustCompile(strutil.AddNewLineFlag(`^key.*$`)).String(), }, { name: "url_with_no_wildcard_end_with_slash", @@ -82,7 +83,7 @@ func TestNew(t *testing.T) { Prefix: "key/", Delimiter: "/", }, - wantFilterRe: regexp.MustCompile(`^key/.*$`).String(), + wantFilterRe: regexp.MustCompile(strutil.AddNewLineFlag(`^key/.*$`)).String(), }, { name: "url_with_wildcard", @@ -92,10 +93,10 @@ func TestNew(t *testing.T) { Bucket: "bucket", Path: "key/a/?/test/*", Prefix: "key/a/", - filterRegex: regexp.MustCompile(`^key/a/./test/.*?$`), + filterRegex: regexp.MustCompile(strutil.AddNewLineFlag(`^key/a/./test/.*$`)), Delimiter: "", }, - wantFilterRe: regexp.MustCompile(`^key/a/./test/.*?$`).String(), + wantFilterRe: regexp.MustCompile(strutil.AddNewLineFlag(`^key/a/./test/.*$`)).String(), }, } for _, tc := range tests { @@ -112,7 +113,6 @@ func TestNew(t *testing.T) { if tc.wantFilterRe != "" { if diff := cmp.Diff(tc.wantFilterRe, got.filterRegex.String()); diff != "" { t.Errorf("test case %q: URL.filterRegex mismatch (-want +got):\n%v", tc.name, diff) - } } }) @@ -228,7 +228,7 @@ func TestURLSetPrefixAndFilter(t *testing.T) { Prefix: "a/b_c/", Delimiter: "", filter: "*/de/*/test", - filterRegex: regexp.MustCompile("^a/b_c/.*?/de/.*?/test$"), + filterRegex: regexp.MustCompile(strutil.AddNewLineFlag("^a/b_c/.*/de/.*/test$")), }, }, { @@ -241,7 +241,7 @@ func TestURLSetPrefixAndFilter(t *testing.T) { Prefix: "a/b_c/d/e", Delimiter: "/", filter: "", - filterRegex: regexp.MustCompile("^a/b_c/d/e.*$"), + filterRegex: regexp.MustCompile(strutil.AddNewLineFlag("^a/b_c/d/e.*$")), }, }, } diff --git a/strutil/strutil.go b/strutil/strutil.go index ab424a35c..a0cec37a0 100644 --- a/strutil/strutil.go +++ b/strutil/strutil.go @@ -3,7 +3,9 @@ package strutil import ( "encoding/json" "fmt" + "regexp" "strconv" + "strings" ) var humanDivisors = [...]struct { @@ -40,3 +42,21 @@ func JSON(v interface{}) string { bytes, _ := json.Marshal(v) return string(bytes) } + +// AddNewLineFlag adds a flag that allows . to match new line character "\n". +// It assumes that the pattern does not have any flags. +func AddNewLineFlag(pattern string) string { + return "(?s)" + pattern +} + +// WildCardToRegexp converts a wildcarded expresiion to equivalent regular expression +func WildCardToRegexp(pattern string) string { + patternRegex := regexp.QuoteMeta(pattern) + patternRegex = strings.Replace(patternRegex, "\\?", ".", -1) + return strings.Replace(patternRegex, "\\*", ".*", -1) +} + +// MatchFromStartToEnd enforces that the regex will match the full string +func MatchFromStartToEnd(pattern string) string { + return "^" + pattern + "$" +} diff --git a/command/exclude_test.go b/strutil/strutil_test.go similarity index 67% rename from command/exclude_test.go rename to strutil/strutil_test.go index 84ba15699..2741f89d6 100644 --- a/command/exclude_test.go +++ b/strutil/strutil_test.go @@ -1,8 +1,8 @@ -package command +package strutil import "testing" -func Test_wildCardToRegexp(t *testing.T) { +func Test_WildCardToRegexp(t *testing.T) { t.Parallel() tests := []struct { name string @@ -12,22 +12,22 @@ func Test_wildCardToRegexp(t *testing.T) { { name: "main*", pattern: "main*", - wanted: "^main.*$", + wanted: "main.*", }, { name: "*.txt", pattern: "*.txt", - wanted: "^.*\\.txt$", + wanted: ".*\\.txt", }, { name: "?_main*.txt", pattern: "?_main*.txt", - wanted: "^._main.*\\.txt$", + wanted: "._main.*\\.txt", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := wildCardToRegexp(tt.pattern); got != tt.wanted { + if got := WildCardToRegexp(tt.pattern); got != tt.wanted { t.Errorf("wildCardToRegexp() = %v, want %v", got, tt.wanted) } })