From 453d5555ff49f3e76ce644cb5484fa5f70b3351c Mon Sep 17 00:00:00 2001 From: Chmouel Boudjnah Date: Tue, 25 Jun 2024 11:07:40 +0200 Subject: [PATCH] feat: add language autodetection This commit introduces a new internal package 'autodetect' that automatically detects the language of the project and applies the appropriate cache patterns. This removes the need for manual pattern specification in most cases. Signed-off-by: Chmouel Boudjnah --- .github/workflows/latest.yaml | 4 +- cmd/cache/fetch.go | 6 +- cmd/cache/upload.go | 6 +- internal/autodetect/patterns.go | 85 ++++++++++++++++++++++++++++ internal/autodetect/patterns_test.go | 50 ++++++++++++++++ internal/flags/common.go | 37 ++++++++++++ 6 files changed, 182 insertions(+), 6 deletions(-) create mode 100644 internal/autodetect/patterns.go create mode 100644 internal/autodetect/patterns_test.go create mode 100644 internal/flags/common.go diff --git a/.github/workflows/latest.yaml b/.github/workflows/latest.yaml index b4f4ddb66..67dbb8637 100644 --- a/.github/workflows/latest.yaml +++ b/.github/workflows/latest.yaml @@ -66,12 +66,14 @@ jobs: kubectl apply -f tests/ -f examples/ tkn task start cache-fetch-go -p gitURL=https://github.com/vdemeester/go-helloworld-app \ -p gitRevision=main \ - -p eachePatterns="**.go,**go.sum" \ + -p cachePatterns="**.go,**go.sum" \ -p cacheURIBase=oci://${REGISTRY}/cache/go \ -w name=source,emptyDir= -w name=gocache,emptyDir= \ -w name=gomodcache,emptyDir= --showlog tkn task start cache-upload-go -p gitURL=https://github.com/vdemeester/go-helloworld-app -p gitRevision=main -p cachePatterns="**.go,**go.sum" -p cacheURIBase=oci://${REGISTRY}/cache/go -w name=source,emptyDir= -w name=gocache,emptyDir= -w name=gomodcache,emptyDir= --showlog tkn task start cache-fetch-go -p gitURL=https://github.com/vdemeester/go-helloworld-app -p gitRevision=main -p cachePatterns="**.go,**go.sum" -p cacheURIBase=oci://${REGISTRY}/cache/go -w name=source,emptyDir= -w name=gocache,emptyDir= -w name=gomodcache,emptyDir= --showlog + tkn task start cache-upload-go -p gitURL=https://github.com/vdemeester/go-helloworld-app -p gitRevision=main -p cacheURIBase=oci://${REGISTRY}/cache/go -w name=source,emptyDir= -w name=gocache,emptyDir= -w name=gomodcache,emptyDir= --showlog + tkn task start cache-fetch-go -p gitURL=https://github.com/vdemeester/go-helloworld-app -p gitRevision=main -p cacheURIBase=oci://${REGISTRY}/cache/go -w name=source,emptyDir= -w name=gocache,emptyDir= -w name=gomodcache,emptyDir= --showlog tkn taskrun list kubectl get taskrun -o yaml publish: diff --git a/cmd/cache/fetch.go b/cmd/cache/fetch.go index 7fa542369..3fde063fc 100644 --- a/cmd/cache/fetch.go +++ b/cmd/cache/fetch.go @@ -8,6 +8,7 @@ import ( "github.com/moby/patternmatcher" "github.com/openshift-pipelines/tekton-caches/internal/fetch" + "github.com/openshift-pipelines/tekton-caches/internal/flags" "github.com/openshift-pipelines/tekton-caches/internal/hash" "github.com/spf13/cobra" ) @@ -15,7 +16,6 @@ import ( const ( workingdirFlag = "workingdir" filesFlag = "hashfiles" - patternsFlag = "pattern" sourceFlag = "source" folderFlag = "folder" insecureFlag = "insecure" @@ -37,7 +37,7 @@ func fetchCmd() *cobra.Command { if err != nil { return err } - patterns, err := cmd.Flags().GetStringArray(patternsFlag) + patterns, err := flags.Patterns(cmd, workingdir) if err != nil { return err } @@ -70,7 +70,7 @@ func fetchCmd() *cobra.Command { }, } - cmd.Flags().StringArray(patternsFlag, []string{}, "Files pattern to compute the hash from") + cmd.Flags().StringArray(flags.PatternsFlag, []string{}, "Files pattern to compute the hash from") cmd.Flags().String(sourceFlag, "", "Cache source reference") cmd.Flags().String(folderFlag, "", "Folder where to extract the content of the cache if it exists") cmd.Flags().String(workingdirFlag, ".", "Working dir from where the files patterns needs to be taken") diff --git a/cmd/cache/upload.go b/cmd/cache/upload.go index c4afafcd8..6928e6b87 100644 --- a/cmd/cache/upload.go +++ b/cmd/cache/upload.go @@ -5,6 +5,7 @@ import ( "os" "github.com/moby/patternmatcher" + "github.com/openshift-pipelines/tekton-caches/internal/flags" "github.com/openshift-pipelines/tekton-caches/internal/hash" "github.com/openshift-pipelines/tekton-caches/internal/upload" "github.com/spf13/cobra" @@ -30,7 +31,8 @@ func uploadCmd() *cobra.Command { if err != nil { return err } - patterns, err := cmd.Flags().GetStringArray(patternsFlag) + + patterns, err := flags.Patterns(cmd, workingdir) if err != nil { return err } @@ -61,7 +63,7 @@ func uploadCmd() *cobra.Command { return upload.Upload(cmd.Context(), hashStr, target, folder, insecure) }, } - cmd.Flags().StringArray(patternsFlag, []string{}, "Files pattern to compute the hash from") + cmd.Flags().StringArray(flags.PatternsFlag, []string{}, "Files pattern to compute the hash from") cmd.Flags().String(targetFlag, "", "Cache target reference") cmd.Flags().String(folderFlag, "", "Folder where to extract the content of the cache if it exists") cmd.Flags().String(workingdirFlag, ".", "Working dir from where the files patterns needs to be taken") diff --git a/internal/autodetect/patterns.go b/internal/autodetect/patterns.go new file mode 100644 index 000000000..3e17d460d --- /dev/null +++ b/internal/autodetect/patterns.go @@ -0,0 +1,85 @@ +package autodetect + +import ( + "os" + "path/filepath" +) + +type ( + Pattern []string + LanguePattern struct { + Language string + Patterns []Pattern + } +) + +var languagePatterns = []LanguePattern{ + { + Language: "go", + Patterns: []Pattern{ + {"go.mod", "go.sum"}, + }, + }, + { + Language: "nodejs", + Patterns: []Pattern{ + {"package.json", "package-lock.json"}, + {"yarn.lock"}, + }, + }, + { + Language: "java", + Patterns: []Pattern{ + {"pom.xml"}, + {"build.gradle"}, + }, + }, + { + Language: "python", + Patterns: []Pattern{ + {"setup.py", "requirements.txt"}, + {"Pipfile"}, + {"poetry.lock"}, + }, + }, + { + Language: "ruby", + Patterns: []Pattern{ + {"Gemfile", "Gemfile.lock"}, + }, + }, + { + Language: "php", + Patterns: []Pattern{ + {"composer.json", "composer.lock"}, + }, + }, + { + Language: "rust", + Patterns: []Pattern{ + {"Cargo.toml", "Cargo.lock"}, + }, + }, +} + +func PatternsByLanguage(workingdir string) map[string][]string { + detectedPatterns := make(map[string][]string) + + for _, languagePattern := range languagePatterns { + for _, pattern := range languagePattern.Patterns { + allFilesExist := true + for _, file := range pattern { + if _, err := os.Stat(filepath.Join(workingdir, file)); os.IsNotExist(err) { + allFilesExist = false + break + } + } + if allFilesExist { + detectedPatterns[languagePattern.Language] = pattern + break + } + } + } + + return detectedPatterns +} diff --git a/internal/autodetect/patterns_test.go b/internal/autodetect/patterns_test.go new file mode 100644 index 000000000..a7978a307 --- /dev/null +++ b/internal/autodetect/patterns_test.go @@ -0,0 +1,50 @@ +package autodetect + +import ( + "os" + "path/filepath" + "testing" + + "gotest.tools/v3/assert" + "gotest.tools/v3/env" + "gotest.tools/v3/fs" +) + +func TestWriteFile(t *testing.T) { + tests := []struct { + name string + language string + files []string + }{ + {name: "golang", language: "go", files: []string{"go.mod", "go.sum"}}, + {name: "nodejs-npm", language: "nodejs", files: []string{"package.json", "package-lock.json"}}, + {name: "nodejs-yarn", language: "nodejs", files: []string{"yarn.lock"}}, + {name: "java-maven", language: "java", files: []string{"pom.xml"}}, + {name: "java-gradle", language: "java", files: []string{"build.gradle"}}, + {name: "python-setup", language: "python", files: []string{"setup.py", "requirements.txt"}}, + {name: "python-pipfile", language: "python", files: []string{"Pipfile"}}, + {name: "python-poetry", language: "python", files: []string{"poetry.lock"}}, + {name: "ruby", language: "ruby", files: []string{"Gemfile", "Gemfile.lock"}}, + {name: "php", language: "php", files: []string{"composer.json", "composer.lock"}}, + {name: "rust", language: "rust", files: []string{"Cargo.toml", "Cargo.lock"}}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tmpdir := fs.NewDir(t, t.Name()) + defer tmpdir.Remove() + + defer env.ChangeWorkingDir(t, tmpdir.Path())() + + for _, file := range tt.files { + err := os.WriteFile(filepath.Join(tmpdir.Path(), file), []byte("random content"), 0o644) + assert.NilError(t, err) + } + + patterns := PatternsByLanguage(tmpdir.Path()) + assert.DeepEqual(t, patterns, map[string][]string{ + tt.language: tt.files, + }) + }) + } +} diff --git a/internal/flags/common.go b/internal/flags/common.go new file mode 100644 index 000000000..bc1665542 --- /dev/null +++ b/internal/flags/common.go @@ -0,0 +1,37 @@ +package flags + +import ( + "fmt" + "os" + + "github.com/openshift-pipelines/tekton-caches/internal/autodetect" + "github.com/spf13/cobra" +) + +var PatternsFlag = "pattern" + +func Patterns(cmd *cobra.Command, workingdir string) ([]string, error) { + patterns, err := cmd.Flags().GetStringArray(PatternsFlag) + if err != nil { + return []string{}, err + } + if len(patterns) == 0 { + // NOTE(chmouel): on multiples languages we use a single cache target, it + // ust make things simpler + // on very large monorepo this might be a problem + languages := autodetect.PatternsByLanguage(workingdir) + if len(languages) == 0 { + return []string{}, fmt.Errorf("didn't detect any language, please specify the patterns with --%s flag", PatternsFlag) + } + for language, files := range languages { + fmt.Fprintf(os.Stderr, "Detected project language %s\n", language) + for _, file := range files { + // NOTE(chmouel): we are using a glob pattern to match the top dir not the subdirs + // but that's fine since most of the time most people don't use + // composed dependencies workspaces (except the rustaceans) + patterns = append(patterns, fmt.Sprintf("*%s", file)) + } + } + } + return patterns, nil +}