Skip to content

Commit

Permalink
Implement the executor (#30)
Browse files Browse the repository at this point in the history
* The executor actually executes bash cells
* We create a lexer to parse the code cells into commands
* We call this lexer "bashish" becauses its a very small subset of the
bash language; mainly just the ability to execute commands

* Fix a bug in the grpc gateway annotations. We need to set body to "*"
otherwise the mappings aren't done correctly.
  • Loading branch information
jlewi authored Apr 8, 2024
1 parent af93946 commit 69bd015
Show file tree
Hide file tree
Showing 14 changed files with 3,480 additions and 45 deletions.
2 changes: 2 additions & 0 deletions app/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ require (
github.com/pkg/errors v0.9.1
github.com/spf13/cobra v1.8.0
github.com/spf13/viper v1.18.2
github.com/timtadh/lexmachine v0.2.3
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.46.1
go.uber.org/zap v1.27.0
google.golang.org/grpc v1.62.1
Expand Down Expand Up @@ -111,6 +112,7 @@ require (
github.com/spf13/cast v1.6.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/subosito/gotenv v1.6.0 // indirect
github.com/timtadh/data-structures v0.6.1 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.2.12 // indirect
github.com/vbatts/tar-split v0.11.3 // indirect
Expand Down
5 changes: 5 additions & 0 deletions app/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,11 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8=
github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU=
github.com/timtadh/data-structures v0.6.1 h1:76eDpwngj2rEi9r/qvdH6YL7wMXGsoFFzhEylo/IacA=
github.com/timtadh/data-structures v0.6.1/go.mod h1:uYUnI1cQi/5yMCc7s23I+x8Mn8BCMf4WgK+7/4QSEk4=
github.com/timtadh/getopt v1.0.0/go.mod h1:L3EL6YN2G0eIAhYBo9b7SB9d/kEQmdnwthIlMJfj210=
github.com/timtadh/lexmachine v0.2.3 h1:ZqlfHnfMcAygtbNM5Gv7jQf8hmM8LfVzDjfCrq235NQ=
github.com/timtadh/lexmachine v0.2.3/go.mod h1:oK1NW+93fQSIF6s+J6sXBFWsCPCFbNmrwKV1i0aqvW0=
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
Expand Down
257 changes: 257 additions & 0 deletions app/pkg/executor/bashish.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
package executor

import (
"strings"

"github.com/go-cmd/cmd"
"github.com/go-logr/zapr"
"github.com/jlewi/hydros/pkg/util"
"github.com/pkg/errors"
"github.com/timtadh/lexmachine"
"github.com/timtadh/lexmachine/machines"
"go.uber.org/zap"
)

type TokenType string

const (
PipeToken TokenType = "PIPE"
QuoteToken TokenType = "QUOTE"
UnmatchedToken TokenType = "UNMATCHED"
TextToken TokenType = "TEXT"
WhiteSpaceToken TokenType = "WHITESPACE"
)

// BashishParser is a parser for the bashish language.
// Bashish is a language that is a very simple subset of bash. It is basically
// shell commands plus the ability to do things like pipe the output of one command to another.
type BashishParser struct {
l *lexmachine.Lexer
}

// NewBashishParser creates a new parser for the bashish language.
func NewBashishParser() (*BashishParser, error) {
// We need to construct a lexer for the bashish language.
l := lexmachine.NewLexer()

// Here's a couple important details about the how the lexer works. Keep these in mind when constructing the rules.
//
// 1. The lexer prefers lower precedence matches that are longer. So be careful about having
// matches that are overly broad.
// 2. Lexer compiles regular expressions to a DFA; it doesn't use GoLang's regexp library.
// As a result, the full regexp syntax is not supported. Notably, not all character classes are supported.
// For a list of supported classes see https://github.com/timtadh/lexmachine#built-in-character-classes.
// 3. Another major limitation is https://github.com/timtadh/lexmachine/issues/34' lexmachine can't expand
// character classes within character classes. As an example `[\w?=]` won't work. A work around is to expand \w
// manually e.g. `[A-Za-z0-9_?=]`

l.Add([]byte(`\w+`), NewTokenAction(TextToken))
l.Add([]byte(`\s+`), NewTokenAction(WhiteSpaceToken))
l.Add([]byte(`['"]`), NewTokenAction(QuoteToken))
l.Add([]byte(`\|`), NewTokenAction(PipeToken))

// We rely on the toTokens function to turn unmatched characters into UnmatchedTexToken
if err := l.Compile(); err != nil {
return nil, errors.Wrapf(err, "Failed to compile the lexer")
}
return &BashishParser{
l: l,
}, nil
}

// Parse a multline string into a sequence of commands
func (p *BashishParser) Parse(doc string) ([]Instruction, error) {
lines := strings.Split(doc, "\n")

instructions := make([]Instruction, 0, 10)
for _, l := range lines {
l := strings.TrimSpace(l)
tokens, err := p.toTokens([]byte(l))
if err != nil {
return nil, err
}

iParser := instructionParser{
insideQuote: false,
fields: make([]string, 0, len(tokens)),
quoteChar: "",
newField: "",
}

newInstructions, err := iParser.parse(tokens)
if err != nil {
return nil, err
}
instructions = append(instructions, newInstructions...)
}
return instructions, nil
}

// toTokens turns the provided input into a stream of tokens.
func (p *BashishParser) toTokens(inBytes []byte) ([]*Token, error) {
scanner, err := p.l.Scanner(inBytes)
if err != nil {
return nil, errors.Wrapf(err, "failed to initialize the scanner")
}
tokens := make([]*Token, 0, 50)
log := zapr.NewLogger(zap.L())
for tok, err, eos := scanner.Next(); !eos; tok, err, eos = scanner.Next() {
unErr := &machines.UnconsumedInput{}
isUncosumed := errors.As(err, &unErr)
if isUncosumed {
// If the scanner returns an unconsumed input we need to handle it.
// We rely on this to turn unmatched characters into UnmatchedToken
scanner.TC = unErr.FailTC
text := unErr.Text[unErr.StartTC:unErr.FailTC]
log.V(util.Debug).Info("lexer returned unconsumed token", "text", string(text))

newToken := &Token{
TokenType: UnmatchedToken,
Lexeme: string(unErr.Text[unErr.StartTC:unErr.FailTC]),
Match: &machines.Match{
Bytes: text,
},
}
tokens = append(tokens, newToken)
continue
} else if err != nil {
return nil, err
}

token, ok := tok.(*Token)
if !ok {
return nil, errors.New("token isn't of type token")
}
tokens = append(tokens, token)
}
return tokens, nil
}

// instructionParser is a state machine for parsing a string of tokens.
type instructionParser struct {
insideQuote bool
fields []string
newField string
// To handle nested quotes we need to keep track of the quote character
quoteChar string
}

// parse parses a sequence of tokens into a sequence of instructions.
func (p *instructionParser) parse(tokens []*Token) ([]Instruction, error) {
instructions := make([]Instruction, 0, len(tokens))
for _, token := range tokens {
val := string(token.Match.Bytes)
switch token.TokenType {
case PipeToken:
if !p.insideQuote && len(p.fields) > 0 {
// Complete the instruction
i := Instruction{
Command: cmd.NewCmd(p.fields[0], p.fields[1:]...),
Piped: true,
}
instructions = append(instructions, i)
p.fields = make([]string, 0, len(tokens))
} else {
p.newField += string(token.Match.Bytes)
}
case QuoteToken:
p.handleQuoteToken(val)
case TextToken:
p.newField += val
case UnmatchedToken:
p.newField += string(token.Match.Bytes)
case WhiteSpaceToken:
if !p.insideQuote {
if len(p.newField) > 0 {
p.fields = append(p.fields, p.newField)
p.newField = ""
}
} else {
p.newField += string(token.Match.Bytes)
}
default:
return nil, errors.Errorf("parse encoutered unknown token type %v", token.TokenType)
}
}

// Any remaining fields should be rolled up into a final instruction.s
if len(p.newField) > 0 {
p.fields = append(p.fields, p.newField)
}
if len(p.fields) > 0 {
i := Instruction{
Command: cmd.NewCmd(p.fields[0], p.fields[1:]...),
Piped: false,
}
instructions = append(instructions, i)
}
return instructions, nil
}

func (p *instructionParser) handleQuoteToken(val string) {
lastChar := ""
if len(p.newField) > 0 {
lastChar = string(p.newField[len(p.newField)-1])
}

if lastChar == "\\" {
// Since slash is an escape character we remove it and add the quote
p.newField = p.newField[:len(p.newField)-1]
p.newField += val
return
}
if p.insideQuote && p.quoteChar != val {
// We encountered a quote within a quote but it is a different quote character
// so we aren't closing the quotation. So just add it to the field
p.newField += val
return
}
// We emulate the shell behavior. In particular, we don't include the quotes in the field.
// For example, suppose we have the shell command
// echo "hello world"
// This is equal to []string{"echo", "hello world"} not
// []string{"echo", "\"hello world\""}

if p.insideQuote {
// Close the quote by adding the field
p.fields = append(p.fields, p.newField)
p.newField = ""
p.quoteChar = ""
p.insideQuote = false
} else {
// Start a quotation
p.quoteChar = val
p.insideQuote = true
}
}

type Token struct {
TokenType TokenType
Lexeme string
Match *machines.Match
}

func NewToken(tokenType TokenType, m *machines.Match) *Token {
return &Token{
TokenType: tokenType,
Lexeme: string(m.Bytes),
Match: m,
}
}

// NewTokenAction creates a lexmachine action for the given tokentype
func NewTokenAction(t TokenType) lexmachine.Action {
return func(scan *lexmachine.Scanner, match *machines.Match) (interface{}, error) {
return NewToken(t, match), nil
}
}

// Instruction represents one instruction in the bashish language.
// This is typically a command that should be executed. In addition it contains information about
// how that command should be executed; e.g. if the output of this command should be piped to the next command.
type Instruction struct {
Command *cmd.Cmd

// Piped should be set to true if the output of this command should be piped to the next instruction.
Piped bool
}
112 changes: 112 additions & 0 deletions app/pkg/executor/bashish_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package executor

import (
"strings"
"testing"

"github.com/go-cmd/cmd"
"github.com/google/go-cmp/cmp"
)

func Test_BashishParser(t *testing.T) {
type testCase struct {
name string
lines []string
expected []Instruction
}

cases := []testCase{
{
name: "basic",
lines: []string{"kubectl get pods"},
expected: []Instruction{
{
Command: cmd.NewCmd("kubectl", "get", "pods"),
},
},
},
{
// This text mimics what you would get if you typed the command into a shell
name: "quoted",
lines: []string{"echo \"some text\""},
expected: []Instruction{
{
Command: cmd.NewCmd("echo", "some text"),
},
},
},
{
name: "simple-pipe",
lines: []string{"ls -la | wc -l"},
expected: []Instruction{
{
Command: cmd.NewCmd("ls", "-la"),
Piped: true,
},
{
Command: cmd.NewCmd("wc", "-l"),
},
},
},
{
name: "pipe-quoted",
lines: []string{`kubectl get pods --format=yaml | jq 'select(.conditions[]) | .status'`},
expected: []Instruction{
{
Command: cmd.NewCmd("kubectl", "get", "pods", "--format=yaml"),
Piped: true,
},
{
Command: cmd.NewCmd("jq", `select(.conditions[]) | .status`),
},
},
},
{
name: "nested-quotes",
lines: []string{`gcloud logging read "resource.labels.project_id=\"foyle-dev\" resource.type=\"k8s_container\" resource.labels.location=\"us-west1\" resource.labels.cluster_name=\"dev\"" --project=foyle-dev`},
expected: []Instruction{
{
Command: cmd.NewCmd("gcloud", "logging", "read", "resource.labels.project_id=\"foyle-dev\" resource.type=\"k8s_container\" resource.labels.location=\"us-west1\" resource.labels.cluster_name=\"dev\"", "--project=foyle-dev"),
},
},
},
}

parser, err := NewBashishParser()

if err != nil {
t.Fatalf("NewBashishParser() returned error %v", err)
}

for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
doc := strings.Join(c.lines, "\n")
actual, err := parser.Parse(doc)
if err != nil {
t.Fatalf("unexpected parsing error %v", err)
}
if len(actual) != len(c.expected) {
t.Errorf("Expected %v instructions got %v", len(c.expected), len(actual))
}

for i, eInstruction := range c.expected {
if i >= len(actual) {
break
}

aInstruction := actual[i]

if aInstruction.Command.Name != eInstruction.Command.Name {
t.Errorf("Expected command.Name to be %v got %v", eInstruction.Command.Name, aInstruction.Command.Name)
}
if d := cmp.Diff(eInstruction.Command.Args, aInstruction.Command.Args); d != "" {
t.Fatalf("Unexpected args (-want +got): %v", d)
}

if aInstruction.Piped != eInstruction.Piped {
t.Errorf("Expected Piped to be %v got %v", eInstruction.Piped, aInstruction.Piped)
}
}
})
}
}
Loading

0 comments on commit 69bd015

Please sign in to comment.