Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement the executor #30

Merged
merged 1 commit into from
Apr 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions app/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ require (
github.com/pkg/errors v0.9.1
github.com/spf13/cobra v1.8.0
github.com/spf13/viper v1.18.2
github.com/timtadh/lexmachine v0.2.3
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.46.1
go.uber.org/zap v1.27.0
google.golang.org/grpc v1.62.1
Expand Down Expand Up @@ -111,6 +112,7 @@ require (
github.com/spf13/cast v1.6.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/subosito/gotenv v1.6.0 // indirect
github.com/timtadh/data-structures v0.6.1 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.2.12 // indirect
github.com/vbatts/tar-split v0.11.3 // indirect
Expand Down
5 changes: 5 additions & 0 deletions app/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,11 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8=
github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU=
github.com/timtadh/data-structures v0.6.1 h1:76eDpwngj2rEi9r/qvdH6YL7wMXGsoFFzhEylo/IacA=
github.com/timtadh/data-structures v0.6.1/go.mod h1:uYUnI1cQi/5yMCc7s23I+x8Mn8BCMf4WgK+7/4QSEk4=
github.com/timtadh/getopt v1.0.0/go.mod h1:L3EL6YN2G0eIAhYBo9b7SB9d/kEQmdnwthIlMJfj210=
github.com/timtadh/lexmachine v0.2.3 h1:ZqlfHnfMcAygtbNM5Gv7jQf8hmM8LfVzDjfCrq235NQ=
github.com/timtadh/lexmachine v0.2.3/go.mod h1:oK1NW+93fQSIF6s+J6sXBFWsCPCFbNmrwKV1i0aqvW0=
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
Expand Down
257 changes: 257 additions & 0 deletions app/pkg/executor/bashish.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
package executor

import (
"strings"

"github.com/go-cmd/cmd"
"github.com/go-logr/zapr"
"github.com/jlewi/hydros/pkg/util"
"github.com/pkg/errors"
"github.com/timtadh/lexmachine"
"github.com/timtadh/lexmachine/machines"
"go.uber.org/zap"
)

type TokenType string

const (
PipeToken TokenType = "PIPE"
QuoteToken TokenType = "QUOTE"
UnmatchedToken TokenType = "UNMATCHED"
TextToken TokenType = "TEXT"
WhiteSpaceToken TokenType = "WHITESPACE"
)

// BashishParser is a parser for the bashish language.
// Bashish is a language that is a very simple subset of bash. It is basically
// shell commands plus the ability to do things like pipe the output of one command to another.
type BashishParser struct {
l *lexmachine.Lexer
}

// NewBashishParser creates a new parser for the bashish language.
func NewBashishParser() (*BashishParser, error) {
// We need to construct a lexer for the bashish language.
l := lexmachine.NewLexer()

// Here's a couple important details about the how the lexer works. Keep these in mind when constructing the rules.
//
// 1. The lexer prefers lower precedence matches that are longer. So be careful about having
// matches that are overly broad.
// 2. Lexer compiles regular expressions to a DFA; it doesn't use GoLang's regexp library.
// As a result, the full regexp syntax is not supported. Notably, not all character classes are supported.
// For a list of supported classes see https://github.com/timtadh/lexmachine#built-in-character-classes.
// 3. Another major limitation is https://github.com/timtadh/lexmachine/issues/34' lexmachine can't expand
// character classes within character classes. As an example `[\w?=]` won't work. A work around is to expand \w
// manually e.g. `[A-Za-z0-9_?=]`

l.Add([]byte(`\w+`), NewTokenAction(TextToken))
l.Add([]byte(`\s+`), NewTokenAction(WhiteSpaceToken))
l.Add([]byte(`['"]`), NewTokenAction(QuoteToken))
l.Add([]byte(`\|`), NewTokenAction(PipeToken))

// We rely on the toTokens function to turn unmatched characters into UnmatchedTexToken
if err := l.Compile(); err != nil {
return nil, errors.Wrapf(err, "Failed to compile the lexer")
}
return &BashishParser{
l: l,
}, nil
}

// Parse a multline string into a sequence of commands
func (p *BashishParser) Parse(doc string) ([]Instruction, error) {
lines := strings.Split(doc, "\n")

instructions := make([]Instruction, 0, 10)
for _, l := range lines {
l := strings.TrimSpace(l)
tokens, err := p.toTokens([]byte(l))
if err != nil {
return nil, err
}

iParser := instructionParser{
insideQuote: false,
fields: make([]string, 0, len(tokens)),
quoteChar: "",
newField: "",
}

newInstructions, err := iParser.parse(tokens)
if err != nil {
return nil, err
}
instructions = append(instructions, newInstructions...)
}
return instructions, nil
}

// toTokens turns the provided input into a stream of tokens.
func (p *BashishParser) toTokens(inBytes []byte) ([]*Token, error) {
scanner, err := p.l.Scanner(inBytes)
if err != nil {
return nil, errors.Wrapf(err, "failed to initialize the scanner")
}
tokens := make([]*Token, 0, 50)
log := zapr.NewLogger(zap.L())
for tok, err, eos := scanner.Next(); !eos; tok, err, eos = scanner.Next() {
unErr := &machines.UnconsumedInput{}
isUncosumed := errors.As(err, &unErr)
if isUncosumed {
// If the scanner returns an unconsumed input we need to handle it.
// We rely on this to turn unmatched characters into UnmatchedToken
scanner.TC = unErr.FailTC
text := unErr.Text[unErr.StartTC:unErr.FailTC]
log.V(util.Debug).Info("lexer returned unconsumed token", "text", string(text))

newToken := &Token{
TokenType: UnmatchedToken,
Lexeme: string(unErr.Text[unErr.StartTC:unErr.FailTC]),
Match: &machines.Match{
Bytes: text,
},
}
tokens = append(tokens, newToken)
continue
} else if err != nil {
return nil, err
}

token, ok := tok.(*Token)
if !ok {
return nil, errors.New("token isn't of type token")
}
tokens = append(tokens, token)
}
return tokens, nil
}

// instructionParser is a state machine for parsing a string of tokens.
type instructionParser struct {
insideQuote bool
fields []string
newField string
// To handle nested quotes we need to keep track of the quote character
quoteChar string
}

// parse parses a sequence of tokens into a sequence of instructions.
func (p *instructionParser) parse(tokens []*Token) ([]Instruction, error) {
instructions := make([]Instruction, 0, len(tokens))
for _, token := range tokens {
val := string(token.Match.Bytes)
switch token.TokenType {
case PipeToken:
if !p.insideQuote && len(p.fields) > 0 {
// Complete the instruction
i := Instruction{
Command: cmd.NewCmd(p.fields[0], p.fields[1:]...),
Piped: true,
}
instructions = append(instructions, i)
p.fields = make([]string, 0, len(tokens))
} else {
p.newField += string(token.Match.Bytes)
}
case QuoteToken:
p.handleQuoteToken(val)
case TextToken:
p.newField += val
case UnmatchedToken:
p.newField += string(token.Match.Bytes)
case WhiteSpaceToken:
if !p.insideQuote {
if len(p.newField) > 0 {
p.fields = append(p.fields, p.newField)
p.newField = ""
}
} else {
p.newField += string(token.Match.Bytes)
}
default:
return nil, errors.Errorf("parse encoutered unknown token type %v", token.TokenType)
}
}

// Any remaining fields should be rolled up into a final instruction.s
if len(p.newField) > 0 {
p.fields = append(p.fields, p.newField)
}
if len(p.fields) > 0 {
i := Instruction{
Command: cmd.NewCmd(p.fields[0], p.fields[1:]...),
Piped: false,
}
instructions = append(instructions, i)
}
return instructions, nil
}

func (p *instructionParser) handleQuoteToken(val string) {
lastChar := ""
if len(p.newField) > 0 {
lastChar = string(p.newField[len(p.newField)-1])
}

if lastChar == "\\" {
// Since slash is an escape character we remove it and add the quote
p.newField = p.newField[:len(p.newField)-1]
p.newField += val
return
}
if p.insideQuote && p.quoteChar != val {
// We encountered a quote within a quote but it is a different quote character
// so we aren't closing the quotation. So just add it to the field
p.newField += val
return
}
// We emulate the shell behavior. In particular, we don't include the quotes in the field.
// For example, suppose we have the shell command
// echo "hello world"
// This is equal to []string{"echo", "hello world"} not
// []string{"echo", "\"hello world\""}

if p.insideQuote {
// Close the quote by adding the field
p.fields = append(p.fields, p.newField)
p.newField = ""
p.quoteChar = ""
p.insideQuote = false
} else {
// Start a quotation
p.quoteChar = val
p.insideQuote = true
}
}

type Token struct {
TokenType TokenType
Lexeme string
Match *machines.Match
}

func NewToken(tokenType TokenType, m *machines.Match) *Token {
return &Token{
TokenType: tokenType,
Lexeme: string(m.Bytes),
Match: m,
}
}

// NewTokenAction creates a lexmachine action for the given tokentype
func NewTokenAction(t TokenType) lexmachine.Action {
return func(scan *lexmachine.Scanner, match *machines.Match) (interface{}, error) {
return NewToken(t, match), nil
}
}

// Instruction represents one instruction in the bashish language.
// This is typically a command that should be executed. In addition it contains information about
// how that command should be executed; e.g. if the output of this command should be piped to the next command.
type Instruction struct {
Command *cmd.Cmd

// Piped should be set to true if the output of this command should be piped to the next instruction.
Piped bool
}
112 changes: 112 additions & 0 deletions app/pkg/executor/bashish_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package executor

import (
"strings"
"testing"

"github.com/go-cmd/cmd"
"github.com/google/go-cmp/cmp"
)

func Test_BashishParser(t *testing.T) {
type testCase struct {
name string
lines []string
expected []Instruction
}

cases := []testCase{
{
name: "basic",
lines: []string{"kubectl get pods"},
expected: []Instruction{
{
Command: cmd.NewCmd("kubectl", "get", "pods"),
},
},
},
{
// This text mimics what you would get if you typed the command into a shell
name: "quoted",
lines: []string{"echo \"some text\""},
expected: []Instruction{
{
Command: cmd.NewCmd("echo", "some text"),
},
},
},
{
name: "simple-pipe",
lines: []string{"ls -la | wc -l"},
expected: []Instruction{
{
Command: cmd.NewCmd("ls", "-la"),
Piped: true,
},
{
Command: cmd.NewCmd("wc", "-l"),
},
},
},
{
name: "pipe-quoted",
lines: []string{`kubectl get pods --format=yaml | jq 'select(.conditions[]) | .status'`},
expected: []Instruction{
{
Command: cmd.NewCmd("kubectl", "get", "pods", "--format=yaml"),
Piped: true,
},
{
Command: cmd.NewCmd("jq", `select(.conditions[]) | .status`),
},
},
},
{
name: "nested-quotes",
lines: []string{`gcloud logging read "resource.labels.project_id=\"foyle-dev\" resource.type=\"k8s_container\" resource.labels.location=\"us-west1\" resource.labels.cluster_name=\"dev\"" --project=foyle-dev`},
expected: []Instruction{
{
Command: cmd.NewCmd("gcloud", "logging", "read", "resource.labels.project_id=\"foyle-dev\" resource.type=\"k8s_container\" resource.labels.location=\"us-west1\" resource.labels.cluster_name=\"dev\"", "--project=foyle-dev"),
},
},
},
}

parser, err := NewBashishParser()

if err != nil {
t.Fatalf("NewBashishParser() returned error %v", err)
}

for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
doc := strings.Join(c.lines, "\n")
actual, err := parser.Parse(doc)
if err != nil {
t.Fatalf("unexpected parsing error %v", err)
}
if len(actual) != len(c.expected) {
t.Errorf("Expected %v instructions got %v", len(c.expected), len(actual))
}

for i, eInstruction := range c.expected {
if i >= len(actual) {
break
}

aInstruction := actual[i]

if aInstruction.Command.Name != eInstruction.Command.Name {
t.Errorf("Expected command.Name to be %v got %v", eInstruction.Command.Name, aInstruction.Command.Name)
}
if d := cmp.Diff(eInstruction.Command.Args, aInstruction.Command.Args); d != "" {
t.Fatalf("Unexpected args (-want +got): %v", d)
}

if aInstruction.Piped != eInstruction.Piped {
t.Errorf("Expected Piped to be %v got %v", eInstruction.Piped, aInstruction.Piped)
}
}
})
}
}
Loading
Loading