Skip to content

Commit

Permalink
feat: add language autodetection
Browse files Browse the repository at this point in the history
This commit introduces a new internal package 'autodetect' that
automatically detects the language of the project and applies the
appropriate cache patterns. This removes the need for manual pattern
specification in most cases.

Signed-off-by: Chmouel Boudjnah <[email protected]>
  • Loading branch information
chmouel committed Jun 25, 2024
1 parent d56388b commit 453d555
Show file tree
Hide file tree
Showing 6 changed files with 182 additions and 6 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/latest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,14 @@ jobs:
kubectl apply -f tests/ -f examples/
tkn task start cache-fetch-go -p gitURL=https://github.com/vdemeester/go-helloworld-app \
-p gitRevision=main \
-p eachePatterns="**.go,**go.sum" \
-p cachePatterns="**.go,**go.sum" \
-p cacheURIBase=oci://${REGISTRY}/cache/go \
-w name=source,emptyDir= -w name=gocache,emptyDir= \
-w name=gomodcache,emptyDir= --showlog
tkn task start cache-upload-go -p gitURL=https://github.com/vdemeester/go-helloworld-app -p gitRevision=main -p cachePatterns="**.go,**go.sum" -p cacheURIBase=oci://${REGISTRY}/cache/go -w name=source,emptyDir= -w name=gocache,emptyDir= -w name=gomodcache,emptyDir= --showlog
tkn task start cache-fetch-go -p gitURL=https://github.com/vdemeester/go-helloworld-app -p gitRevision=main -p cachePatterns="**.go,**go.sum" -p cacheURIBase=oci://${REGISTRY}/cache/go -w name=source,emptyDir= -w name=gocache,emptyDir= -w name=gomodcache,emptyDir= --showlog
tkn task start cache-upload-go -p gitURL=https://github.com/vdemeester/go-helloworld-app -p gitRevision=main -p cacheURIBase=oci://${REGISTRY}/cache/go -w name=source,emptyDir= -w name=gocache,emptyDir= -w name=gomodcache,emptyDir= --showlog
tkn task start cache-fetch-go -p gitURL=https://github.com/vdemeester/go-helloworld-app -p gitRevision=main -p cacheURIBase=oci://${REGISTRY}/cache/go -w name=source,emptyDir= -w name=gocache,emptyDir= -w name=gomodcache,emptyDir= --showlog
tkn taskrun list
kubectl get taskrun -o yaml
publish:
Expand Down
6 changes: 3 additions & 3 deletions cmd/cache/fetch.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ import (

"github.com/moby/patternmatcher"
"github.com/openshift-pipelines/tekton-caches/internal/fetch"
"github.com/openshift-pipelines/tekton-caches/internal/flags"
"github.com/openshift-pipelines/tekton-caches/internal/hash"
"github.com/spf13/cobra"
)

const (
workingdirFlag = "workingdir"
filesFlag = "hashfiles"
patternsFlag = "pattern"
sourceFlag = "source"
folderFlag = "folder"
insecureFlag = "insecure"
Expand All @@ -37,7 +37,7 @@ func fetchCmd() *cobra.Command {
if err != nil {
return err
}
patterns, err := cmd.Flags().GetStringArray(patternsFlag)
patterns, err := flags.Patterns(cmd, workingdir)
if err != nil {
return err
}
Expand Down Expand Up @@ -70,7 +70,7 @@ func fetchCmd() *cobra.Command {
},
}

cmd.Flags().StringArray(patternsFlag, []string{}, "Files pattern to compute the hash from")
cmd.Flags().StringArray(flags.PatternsFlag, []string{}, "Files pattern to compute the hash from")
cmd.Flags().String(sourceFlag, "", "Cache source reference")
cmd.Flags().String(folderFlag, "", "Folder where to extract the content of the cache if it exists")
cmd.Flags().String(workingdirFlag, ".", "Working dir from where the files patterns needs to be taken")
Expand Down
6 changes: 4 additions & 2 deletions cmd/cache/upload.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"os"

"github.com/moby/patternmatcher"
"github.com/openshift-pipelines/tekton-caches/internal/flags"
"github.com/openshift-pipelines/tekton-caches/internal/hash"
"github.com/openshift-pipelines/tekton-caches/internal/upload"
"github.com/spf13/cobra"
Expand All @@ -30,7 +31,8 @@ func uploadCmd() *cobra.Command {
if err != nil {
return err
}
patterns, err := cmd.Flags().GetStringArray(patternsFlag)

patterns, err := flags.Patterns(cmd, workingdir)
if err != nil {
return err
}
Expand Down Expand Up @@ -61,7 +63,7 @@ func uploadCmd() *cobra.Command {
return upload.Upload(cmd.Context(), hashStr, target, folder, insecure)
},
}
cmd.Flags().StringArray(patternsFlag, []string{}, "Files pattern to compute the hash from")
cmd.Flags().StringArray(flags.PatternsFlag, []string{}, "Files pattern to compute the hash from")
cmd.Flags().String(targetFlag, "", "Cache target reference")
cmd.Flags().String(folderFlag, "", "Folder where to extract the content of the cache if it exists")
cmd.Flags().String(workingdirFlag, ".", "Working dir from where the files patterns needs to be taken")
Expand Down
85 changes: 85 additions & 0 deletions internal/autodetect/patterns.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
package autodetect

import (
"os"
"path/filepath"
)

type (
Pattern []string
LanguePattern struct {
Language string
Patterns []Pattern
}
)

var languagePatterns = []LanguePattern{
{
Language: "go",
Patterns: []Pattern{
{"go.mod", "go.sum"},
},
},
{
Language: "nodejs",
Patterns: []Pattern{
{"package.json", "package-lock.json"},
{"yarn.lock"},
},
},
{
Language: "java",
Patterns: []Pattern{
{"pom.xml"},
{"build.gradle"},
},
},
{
Language: "python",
Patterns: []Pattern{
{"setup.py", "requirements.txt"},
{"Pipfile"},
{"poetry.lock"},
},
},
{
Language: "ruby",
Patterns: []Pattern{
{"Gemfile", "Gemfile.lock"},
},
},
{
Language: "php",
Patterns: []Pattern{
{"composer.json", "composer.lock"},
},
},
{
Language: "rust",
Patterns: []Pattern{
{"Cargo.toml", "Cargo.lock"},
},
},
}

func PatternsByLanguage(workingdir string) map[string][]string {
detectedPatterns := make(map[string][]string)

for _, languagePattern := range languagePatterns {
for _, pattern := range languagePattern.Patterns {
allFilesExist := true
for _, file := range pattern {
if _, err := os.Stat(filepath.Join(workingdir, file)); os.IsNotExist(err) {
allFilesExist = false
break
}
}
if allFilesExist {
detectedPatterns[languagePattern.Language] = pattern
break
}
}
}

return detectedPatterns
}
50 changes: 50 additions & 0 deletions internal/autodetect/patterns_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package autodetect

import (
"os"
"path/filepath"
"testing"

"gotest.tools/v3/assert"
"gotest.tools/v3/env"
"gotest.tools/v3/fs"
)

func TestWriteFile(t *testing.T) {
tests := []struct {
name string
language string
files []string
}{
{name: "golang", language: "go", files: []string{"go.mod", "go.sum"}},
{name: "nodejs-npm", language: "nodejs", files: []string{"package.json", "package-lock.json"}},
{name: "nodejs-yarn", language: "nodejs", files: []string{"yarn.lock"}},
{name: "java-maven", language: "java", files: []string{"pom.xml"}},
{name: "java-gradle", language: "java", files: []string{"build.gradle"}},
{name: "python-setup", language: "python", files: []string{"setup.py", "requirements.txt"}},
{name: "python-pipfile", language: "python", files: []string{"Pipfile"}},
{name: "python-poetry", language: "python", files: []string{"poetry.lock"}},
{name: "ruby", language: "ruby", files: []string{"Gemfile", "Gemfile.lock"}},
{name: "php", language: "php", files: []string{"composer.json", "composer.lock"}},
{name: "rust", language: "rust", files: []string{"Cargo.toml", "Cargo.lock"}},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tmpdir := fs.NewDir(t, t.Name())
defer tmpdir.Remove()

defer env.ChangeWorkingDir(t, tmpdir.Path())()

for _, file := range tt.files {
err := os.WriteFile(filepath.Join(tmpdir.Path(), file), []byte("random content"), 0o644)
assert.NilError(t, err)
}

patterns := PatternsByLanguage(tmpdir.Path())
assert.DeepEqual(t, patterns, map[string][]string{
tt.language: tt.files,
})
})
}
}
37 changes: 37 additions & 0 deletions internal/flags/common.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package flags

import (
"fmt"
"os"

"github.com/openshift-pipelines/tekton-caches/internal/autodetect"
"github.com/spf13/cobra"
)

var PatternsFlag = "pattern"

func Patterns(cmd *cobra.Command, workingdir string) ([]string, error) {
patterns, err := cmd.Flags().GetStringArray(PatternsFlag)
if err != nil {
return []string{}, err
}
if len(patterns) == 0 {
// NOTE(chmouel): on multiples languages we use a single cache target, it
// ust make things simpler
// on very large monorepo this might be a problem
languages := autodetect.PatternsByLanguage(workingdir)
if len(languages) == 0 {
return []string{}, fmt.Errorf("didn't detect any language, please specify the patterns with --%s flag", PatternsFlag)
}
for language, files := range languages {
fmt.Fprintf(os.Stderr, "Detected project language %s\n", language)
for _, file := range files {
// NOTE(chmouel): we are using a glob pattern to match the top dir not the subdirs
// but that's fine since most of the time most people don't use
// composed dependencies workspaces (except the rustaceans)
patterns = append(patterns, fmt.Sprintf("*%s", file))
}
}
}
return patterns, nil
}

0 comments on commit 453d555

Please sign in to comment.