Skip to content

Commit

Permalink
[chore][pkg/stanza/fileconsumer] Move finder into internal package (o…
Browse files Browse the repository at this point in the history
…pen-telemetry#24772)

Follows open-telemetry#24688

This is an incremental step towards hardening `fileconsumer` package.
This isolates the "finding files" logic into its own package. The
recently added filtering of files based on additional criteria is left
in place for now. It also strengthens the test cases by using actual
input, which is often relative, instead of converting everything to
absolute paths.
  • Loading branch information
djaglowski authored Aug 2, 2023
1 parent 351b5c3 commit be93b0f
Show file tree
Hide file tree
Showing 4 changed files with 181 additions and 145 deletions.
24 changes: 3 additions & 21 deletions pkg/stanza/fileconsumer/finder.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ package fileconsumer // import "github.com/open-telemetry/opentelemetry-collecto
import (
"regexp"

"github.com/bmatcuk/doublestar/v4"
"go.uber.org/multierr"

"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/fileconsumer/internal/finder"
)

type MatchingCriteria struct {
Expand Down Expand Up @@ -47,26 +48,7 @@ type sortRuleImpl struct {

// findFiles gets a list of paths given an array of glob patterns to include and exclude
func (f MatchingCriteria) findFiles() ([]string, error) {
all := make([]string, 0, len(f.Include))
for _, include := range f.Include {
matches, _ := doublestar.FilepathGlob(include, doublestar.WithFilesOnly()) // compile error checked in build
INCLUDE:
for _, match := range matches {
for _, exclude := range f.Exclude {
if itMatches, _ := doublestar.PathMatch(exclude, match); itMatches {
continue INCLUDE
}
}

for _, existing := range all {
if existing == match {
continue INCLUDE
}
}

all = append(all, match)
}
}
all := finder.FindFiles(f.Include, f.Exclude)

if len(all) == 0 || len(f.OrderingCriteria.SortBy) == 0 {
return all, nil
Expand Down
131 changes: 7 additions & 124 deletions pkg/stanza/fileconsumer/finder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,109 +21,6 @@ func TestFinder(t *testing.T) {
filterSortRule OrderingCriteria
expected []string
}{
{
name: "IncludeOne",
files: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
include: []string{"a1.log"},
exclude: []string{},
expected: []string{"a1.log"},
},
{
name: "IncludeNone",
files: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
include: []string{"c*.log"},
exclude: []string{},
expected: []string{},
},
{
name: "IncludeAll",
files: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
include: []string{"*"},
exclude: []string{},
expected: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
},
{
name: "IncludeLogs",
files: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
include: []string{"*.log"},
exclude: []string{},
expected: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
},
{
name: "IncludeA",
files: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
include: []string{"a*.log"},
exclude: []string{},
expected: []string{"a1.log", "a2.log"},
},
{
name: "Include2s",
files: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
include: []string{"*2.log"},
exclude: []string{},
expected: []string{"a2.log", "b2.log"},
},
{
name: "Exclude",
files: []string{"include.log", "exclude.log"},
include: []string{"*"},
exclude: []string{"exclude.log"},
expected: []string{"include.log"},
},
{
name: "ExcludeMany",
files: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
include: []string{"*"},
exclude: []string{"a*.log", "*2.log"},
expected: []string{"b1.log"},
},
{
name: "ExcludeDuplicates",
files: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
include: []string{"*1*", "a*"},
exclude: []string{"a*.log", "*2.log"},
expected: []string{"b1.log"},
},
{
name: "IncludeMultipleDirectories",
files: []string{filepath.Join("a", "1.log"), filepath.Join("a", "2.log"), filepath.Join("b", "1.log"), filepath.Join("b", "2.log")},
include: []string{filepath.Join("a", "*.log"), filepath.Join("b", "*.log")},
exclude: []string{},
expected: []string{filepath.Join("a", "1.log"), filepath.Join("a", "2.log"), filepath.Join("b", "1.log"), filepath.Join("b", "2.log")},
},
{
name: "IncludeMultipleDirectoriesVaryingDepth",
files: []string{"1.log", filepath.Join("a", "1.log"), filepath.Join("a", "b", "1.log"), filepath.Join("c", "1.log")},
include: []string{"*.log", filepath.Join("a", "*.log"), filepath.Join("a", "b", "*.log"), filepath.Join("c", "*.log")},
exclude: []string{},
expected: []string{"1.log", filepath.Join("a", "1.log"), filepath.Join("a", "b", "1.log"), filepath.Join("c", "1.log")},
},
{
name: "DoubleStarSameDepth",
files: []string{filepath.Join("a", "1.log"), filepath.Join("b", "1.log"), filepath.Join("c", "1.log")},
include: []string{filepath.Join("**", "*.log")},
exclude: []string{},
expected: []string{filepath.Join("a", "1.log"), filepath.Join("b", "1.log"), filepath.Join("c", "1.log")},
},
{
name: "DoubleStarVaryingDepth",
files: []string{"1.log", filepath.Join("a", "1.log"), filepath.Join("a", "b", "1.log"), filepath.Join("c", "1.log")},
include: []string{filepath.Join("**", "*.log")},
exclude: []string{},
expected: []string{"1.log", filepath.Join("a", "1.log"), filepath.Join("a", "b", "1.log"), filepath.Join("c", "1.log")},
},
{
name: "SingleLevelFilesOnly",
files: []string{"a1.log", "a2.txt", "b/b1.log", "b/b2.txt"},
include: []string{"*"},
expected: []string{"a1.log", "a2.txt"},
},
{
name: "MultiLevelFilesOnly",
files: []string{"a1.log", "a2.txt", "b/b1.log", "b/b2.txt", "b/c/c1.csv"},
include: []string{filepath.Join("**", "*")},
expected: []string{"a1.log", "a2.txt", filepath.Join("b", "b1.log"), filepath.Join("b", "b2.txt"), filepath.Join("b", "c", "c1.csv")},
},
{
name: "Timestamp Sorting",
files: []string{"err.2023020611.log", "err.2023020612.log", "err.2023020610.log", "err.2023020609.log"},
Expand Down Expand Up @@ -534,33 +431,19 @@ func TestFinder(t *testing.T) {

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
tempDir := t.TempDir()
files := absPath(tempDir, tc.files)
include := absPath(tempDir, tc.include)
exclude := absPath(tempDir, tc.exclude)
expected := absPath(tempDir, tc.expected)

for _, f := range files {
require.NoError(t, os.Chdir(t.TempDir()))
for _, f := range tc.files {
require.NoError(t, os.MkdirAll(filepath.Dir(f), 0700))
require.NoError(t, os.WriteFile(f, []byte(filepath.Base(f)), 0000))
}

finder := MatchingCriteria{
Include: include,
Exclude: exclude,
matcher := MatchingCriteria{
Include: tc.include,
Exclude: tc.exclude,
OrderingCriteria: tc.filterSortRule,
}
files, err := finder.findFiles()
files, err := matcher.findFiles()
require.NoError(t, err)
require.Equal(t, expected, files)
require.Equal(t, tc.expected, files)
})
}
}

func absPath(tempDir string, files []string) []string {
absFiles := make([]string, 0, len(files))
for _, f := range files {
absFiles = append(absFiles, filepath.Join(tempDir, f))
}
return absFiles
}
34 changes: 34 additions & 0 deletions pkg/stanza/fileconsumer/internal/finder/finder.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package finder // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/fileconsumer/internal/finder"

import (
"github.com/bmatcuk/doublestar/v4"
)

// FindFiles gets a list of paths given an array of glob patterns to include and exclude
func FindFiles(includes []string, excludes []string) []string {
all := make([]string, 0, len(includes))
for _, include := range includes {
matches, _ := doublestar.FilepathGlob(include, doublestar.WithFilesOnly()) // compile error checked in build
INCLUDE:
for _, match := range matches {
for _, exclude := range excludes {
if itMatches, _ := doublestar.PathMatch(exclude, match); itMatches {
continue INCLUDE
}
}

for _, existing := range all {
if existing == match {
continue INCLUDE
}
}

all = append(all, match)
}
}

return all
}
137 changes: 137 additions & 0 deletions pkg/stanza/fileconsumer/internal/finder/finder_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package finder

import (
"os"
"path/filepath"
"testing"

"github.com/stretchr/testify/require"
)

func TestFinder(t *testing.T) {
cases := []struct {
name string
files []string
include []string
exclude []string
expected []string
}{
{
name: "IncludeOne",
files: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
include: []string{"a1.log"},
exclude: []string{},
expected: []string{"a1.log"},
},
{
name: "IncludeNone",
files: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
include: []string{"c*.log"},
exclude: []string{},
expected: []string{},
},
{
name: "IncludeAll",
files: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
include: []string{"*"},
exclude: []string{},
expected: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
},
{
name: "IncludeLogs",
files: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
include: []string{"*.log"},
exclude: []string{},
expected: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
},
{
name: "IncludeA",
files: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
include: []string{"a*.log"},
exclude: []string{},
expected: []string{"a1.log", "a2.log"},
},
{
name: "Include2s",
files: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
include: []string{"*2.log"},
exclude: []string{},
expected: []string{"a2.log", "b2.log"},
},
{
name: "Exclude",
files: []string{"include.log", "exclude.log"},
include: []string{"*"},
exclude: []string{"exclude.log"},
expected: []string{"include.log"},
},
{
name: "ExcludeMany",
files: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
include: []string{"*"},
exclude: []string{"a*.log", "*2.log"},
expected: []string{"b1.log"},
},
{
name: "ExcludeDuplicates",
files: []string{"a1.log", "a2.log", "b1.log", "b2.log"},
include: []string{"*1*", "a*", "*.log"},
exclude: []string{"a*.log", "*2.log"},
expected: []string{"b1.log"},
},
{
name: "IncludeMultipleDirectories",
files: []string{filepath.Join("a", "1.log"), filepath.Join("a", "2.log"), filepath.Join("b", "1.log"), filepath.Join("b", "2.log")},
include: []string{filepath.Join("a", "*.log"), filepath.Join("b", "*.log")},
exclude: []string{},
expected: []string{filepath.Join("a", "1.log"), filepath.Join("a", "2.log"), filepath.Join("b", "1.log"), filepath.Join("b", "2.log")},
},
{
name: "IncludeMultipleDirectoriesVaryingDepth",
files: []string{"1.log", filepath.Join("a", "1.log"), filepath.Join("a", "b", "1.log"), filepath.Join("c", "1.log")},
include: []string{"*.log", filepath.Join("a", "*.log"), filepath.Join("a", "b", "*.log"), filepath.Join("c", "*.log")},
exclude: []string{},
expected: []string{"1.log", filepath.Join("a", "1.log"), filepath.Join("a", "b", "1.log"), filepath.Join("c", "1.log")},
},
{
name: "DoubleStarSameDepth",
files: []string{filepath.Join("a", "1.log"), filepath.Join("b", "1.log"), filepath.Join("c", "1.log")},
include: []string{filepath.Join("**", "*.log")},
exclude: []string{},
expected: []string{filepath.Join("a", "1.log"), filepath.Join("b", "1.log"), filepath.Join("c", "1.log")},
},
{
name: "DoubleStarVaryingDepth",
files: []string{"1.log", filepath.Join("a", "1.log"), filepath.Join("a", "b", "1.log"), filepath.Join("c", "1.log")},
include: []string{filepath.Join("**", "*.log")},
exclude: []string{},
expected: []string{"1.log", filepath.Join("a", "1.log"), filepath.Join("a", "b", "1.log"), filepath.Join("c", "1.log")},
},
{
name: "SingleLevelFilesOnly",
files: []string{"a1.log", "a2.txt", "b/b1.log", "b/b2.txt"},
include: []string{"*"},
expected: []string{"a1.log", "a2.txt"},
},
{
name: "MultiLevelFilesOnly",
files: []string{"a1.log", "a2.txt", "b/b1.log", "b/b2.txt", "b/c/c1.csv"},
include: []string{filepath.Join("**", "*")},
expected: []string{"a1.log", "a2.txt", filepath.Join("b", "b1.log"), filepath.Join("b", "b2.txt"), filepath.Join("b", "c", "c1.csv")},
},
}

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
require.NoError(t, os.Chdir(t.TempDir()))
for _, f := range tc.files {
require.NoError(t, os.MkdirAll(filepath.Dir(f), 0700))
require.NoError(t, os.WriteFile(f, []byte(filepath.Base(f)), 0000))
}
require.Equal(t, tc.expected, FindFiles(tc.include, tc.exclude))
})
}
}

0 comments on commit be93b0f

Please sign in to comment.