From 634a66df0e13f32f1869aeaa6795fdb52e8ad397 Mon Sep 17 00:00:00 2001 From: Mitchell Paulus Date: Sun, 15 Dec 2024 09:12:20 -0600 Subject: [PATCH] Adding syntax for definition typing (#16) --- DEVELOP.md | 4 +- lib/std.msh | 46 +++---- mshell/Lexer.go | 11 +- mshell/MShellObject.go | 8 +- mshell/Parser.go | 303 ++++++++++++++++++++++++++++++++++++++++- tests/indexing.msh | 2 +- tests/simple_def.msh | 4 +- 7 files changed, 339 insertions(+), 39 deletions(-) diff --git a/DEVELOP.md b/DEVELOP.md index 009e6f2..807bfc0 100644 --- a/DEVELOP.md +++ b/DEVELOP.md @@ -69,8 +69,8 @@ typeItem typeQuote : '(' type* -- type* ')' ; typeList : homogeneousList | heterogeneousList ; -homogeneousList : '[' type '*' ']' ; -heterogeneousList : '[' type+ ']' ; +homogeneousList : '[' type ']' ; +heterogeneousList : '&' '[' type* ']' ; ``` Key Types: diff --git a/lib/std.msh b/lib/std.msh index 3f7cf23..bc4930c 100644 --- a/lib/std.msh +++ b/lib/std.msh @@ -2,8 +2,7 @@ # type numeric int | float -# each (list quote: (item --) --) -def each +def each ([T] (T --) --) over len each-len! # Get total length 0 each-idx! # index ( @@ -20,13 +19,7 @@ def each end # map (list quote -- list) -def map -# ( -# [T*]: list -# (T -- U): q -# -- -# [U*] -# ) +def map ([T] (T -- U) -- [U]) over len map-len! # Get total length 0 map-idx! # Index [] map-accum! # Accumulator @@ -45,7 +38,7 @@ def map end # filter (list quote -- list) -def filter +def filter ([T] (T -- bool) -- [T]) over len filter-len! # Get total length 0 filter-idx! # Index [] filter-accum! # Accumulator @@ -64,7 +57,7 @@ def filter end # foldl (quote initial list -- result) -def foldl +def foldl ((T T -- T) T [T] -- T) # quote initial list swap foldl-accum! # Accumulator, # quote list @@ -80,24 +73,24 @@ def foldl end # sum (list -- value) -def sum +def sum ([float] -- float) (+) 0 rot foldl end # .. (-- list[str]), equals kinda looks like lines. -def .. stdin lines end +def .. (-- [str]) stdin lines end # tt = Tab separated Table (-- list[list[str]]) -def tt .. (" " split) map end +def tt (-- [[str]]) .. (" " split) map end -# wt = Whitespace separated Table (-- list[list[str]]) -def wt .. (wsplit) map end +# wt = Whitespace separated Table (-- [[str*]*]) +def wt (-- [[str]]) .. (wsplit) map end -# wjoin = Whitespace join (list -- str) -def wjoin " " join end +# wjoin = Whitespace join ([str*] -- str) +def wjoin ([str] -- str) " " join end # unlines (list[str] -- str) -def unlines +def unlines ([str] -- str) [] unlines-accum! # Accumulator ( @unlines-accum append @@ -107,8 +100,8 @@ def unlines @unlines-accum "" join end -# reverse (list|string -- list) -def reverse +# reverse ([T*]|str -- [T*]|str) +def reverse ([T] -- [T]) dup len 1 - reverse-idx! # Set Idx [] # list accum ( @@ -120,16 +113,15 @@ def reverse end # abs (int|float -- int|float) -def abs +def abs (float -- float) [(dup 0 <) (-1 *)] if end -def tab " " end -def tsplit tab split end -def uw unlines w end +def tab (-- str) " " end +def tsplit (str -- [str]) tab split end +def uw ([str] --) unlines w end -# readTsvFile (str -- list[list[str]]) -def readTsvFile +def readTsvFile (str -- [[str]]) readFile lines (tsplit) map end end diff --git a/mshell/Lexer.go b/mshell/Lexer.go index e8a84eb..9ad09bd 100644 --- a/mshell/Lexer.go +++ b/mshell/Lexer.go @@ -4,6 +4,7 @@ import ( "fmt" "os" "unicode" + "encoding/json" ) type TokenType int @@ -62,6 +63,7 @@ const ( // TYPESTRING, using str token instead TYPEBOOL DOUBLEDASH + AMPERSAND ) func (t TokenType) String() string { @@ -170,6 +172,8 @@ func (t TokenType) String() string { return "TYPEBOOL" case DOUBLEDASH: return "DOUBLEDASH" + case AMPERSAND: + return "AMPERSAND" default: return "UNKNOWN" } @@ -185,7 +189,8 @@ type Token struct { } func (t Token) ToJson() string { - return fmt.Sprintf("{\"line\": %d, \"column\": %d, \"start\": %d, \"lexeme\": \"%s\", \"type\": \"%s\"}", t.Line, t.Column, t.Start, t.Lexeme, t.Type) + escaped, _ := json.Marshal(t.Lexeme) + return fmt.Sprintf("{\"line\": %d, \"column\": %d, \"start\": %d, \"lexeme\": %s, \"type\": \"%s\"}", t.Line, t.Column, t.Start, string(escaped), t.Type) } func (t Token) DebugString() string { @@ -268,6 +273,8 @@ var notAllowedLiteralChars = map[rune]bool{ '!': true, '@': true, '=': true, + '&': true, + '|': true, } func isAllowedLiteral(r rune) bool { @@ -447,6 +454,8 @@ func (l *Lexer) scanToken() Token { return l.parseLiteralOrKeyword() case '=': return l.makeToken(EQUALS) + case '&': + return l.makeToken(AMPERSAND) case '<': if l.peek() == '=' { l.advance() diff --git a/mshell/MShellObject.go b/mshell/MShellObject.go index fef07a9..f9deaae 100644 --- a/mshell/MShellObject.go +++ b/mshell/MShellObject.go @@ -4,6 +4,7 @@ import ( "fmt" "strconv" "strings" + "encoding/json" ) type Jsonable interface { @@ -752,7 +753,8 @@ func (obj *MShellSimple) Slice(startInc int, endExc int) (MShellObject, error) { // ToJson func (obj *MShellLiteral) ToJson() string { - return fmt.Sprintf("{\"type\": \"Literal\", \"value\": \"%s\"}", obj.LiteralText) + escBytes, _ := json.Marshal(obj.LiteralText) + return fmt.Sprintf("{\"type\": \"Literal\", \"value\": \"%s\"}", string(escBytes)) } func (obj *MShellBool) ToJson() string { @@ -788,7 +790,9 @@ func (obj *MShellList) ToJson() string { } func (obj *MShellString) ToJson() string { - return fmt.Sprintf("{\"type\": \"String\", \"content\": \"%s\"}", obj.Content) + // Escape the content + escBytes, _ := json.Marshal(obj.Content) + return fmt.Sprintf("{\"type\": \"String\", \"content\": %s}", string(escBytes)) } func (obj *MShellPipe) ToJson() string { diff --git a/mshell/Parser.go b/mshell/Parser.go index a2ab636..daaff44 100644 --- a/mshell/Parser.go +++ b/mshell/Parser.go @@ -66,10 +66,11 @@ func (quote *MShellParseQuote) DebugString() string { type MShellDefinition struct { Name string Items []MShellParseItem + TypeDef TypeDefinition } func (def *MShellDefinition) ToJson() string { - return fmt.Sprintf("{\"name\": \"%s\", \"items\": %s}", def.Name, ToJson(def.Items)) + return fmt.Sprintf("{\"name\": \"%s\", \"items\": %s, \"type\": %s }", def.Name, ToJson(def.Items), def.TypeDef.ToJson()) } func ToJson(objList []MShellParseItem) string { @@ -86,6 +87,23 @@ func ToJson(objList []MShellParseItem) string { return builder.String() } +func TypeListToJson(typeList []MShellType) string { + if len(typeList) == 0 { + return "[]" + } + + builder := strings.Builder{} + builder.WriteString("[") + builder.WriteString(typeList[0].ToJson()) + for i := 1; i < len(typeList); i++ { + builder.WriteString(", ") + builder.WriteString(typeList[i].ToJson()) + } + builder.WriteString("]") + return builder.String() +} + + func (file *MShellFile) ToJson() string { // Start builder for definitions definitions := strings.Builder{} @@ -123,13 +141,23 @@ func (parser *MShellParser) NextToken() { func (parser *MShellParser) Match(token Token, tokenType TokenType) error { if token.Type != tokenType { - message := fmt.Sprintf("Expected %s, got %s", tokenType, token.Type) + message := fmt.Sprintf("%d:%d: Expected %s, got %s", token.Line, token.Column, tokenType, token.Type) return errors.New(message) } parser.NextToken() return nil } +func (parser *MShellParser) MatchWithMessage(token Token, tokenType TokenType, message string) error { + if token.Type != tokenType { + message := fmt.Sprintf("%d:%d: %s", token.Line, token.Column, message) + return errors.New(message) + } + parser.NextToken() + return nil +} + + func (parser *MShellParser) ParseFile() (*MShellFile, error) { file := &MShellFile{} @@ -148,12 +176,18 @@ func (parser *MShellParser) ParseFile() (*MShellFile, error) { case DEF: _ = parser.Match(parser.curr, DEF) if parser.curr.Type != LITERAL { - return file, errors.New(fmt.Sprintf("Expected LITERAL, got %s", parser.curr.Type)) + return file, errors.New(fmt.Sprintf("Expected a name for the definition, got %s", parser.curr.Type)) } - def := MShellDefinition{Name: parser.curr.Lexeme, Items: []MShellParseItem{}} + def := MShellDefinition{Name: parser.curr.Lexeme, Items: []MShellParseItem{}, TypeDef: TypeDefinition{}} _ = parser.Match(parser.curr, LITERAL) + typeDef, err := parser.ParseTypeDefinition() + if err != nil { + return file, err + } + def.TypeDef = *typeDef + for { if parser.curr.Type == END { break @@ -185,6 +219,267 @@ func (parser *MShellParser) ParseFile() (*MShellFile, error) { return file, nil } +type MShellType interface { + ToJson() string +} + +type TypeDefinition struct { + InputTypes []MShellType + OutputTypes []MShellType +} + +func (def *TypeDefinition) ToJson() string { + return fmt.Sprintf("{\"input\": %s, \"output\": %s}", TypeListToJson(def.InputTypes), TypeListToJson(def.OutputTypes)) +} + +type TypeGeneric struct { + Name string +} + +func (generic TypeGeneric) ToJson() string { + return fmt.Sprintf("{ \"generic\": \"%s\" }", generic.Name) +} + +type TypeInt struct { } + +func (t TypeInt) ToJson() string { + return "\"int\"" +} + +type TypeFloat struct { } + +func (t TypeFloat) ToJson() string { + return "\"float\"" +} + +type TypeString struct { } + +func (t TypeString) ToJson() string { + return "\"string\"" +} + +type TypeBool struct { } + +func (t TypeBool) ToJson() string { + return "\"bool\"" +} + +type TypeList struct { + ListType MShellType +} + +func (list *TypeList) ToJson() string { + return fmt.Sprintf("{\"list\": %s}", list.ListType.ToJson()) +} + +type TypeTuple struct { + Types []MShellType +} + +func (tuple *TypeTuple) ToJson() string { + return fmt.Sprintf("{\"tuple\": %s}", TypeListToJson(tuple.Types)) +} + +type TypeQuote struct { + InputTypes []MShellType + OutputTypes []MShellType +} + +func (quote *TypeQuote) ToJson() string { + return fmt.Sprintf("{\"input\": %s, \"output\": %s}", TypeListToJson(quote.InputTypes), TypeListToJson(quote.OutputTypes)) +} + +func (parser *MShellParser) ParseTypeDefinition() (*TypeDefinition, error) { + err := parser.MatchWithMessage(parser.curr, LEFT_PAREN, "Expected '(' to start type definition.") + if err != nil { + return nil, err + } + + // Parse first type + inputTypes, err := parser.ParseTypeItems() + if err != nil { + return nil, err + } + + err = parser.Match(parser.curr, DOUBLEDASH) + if err != nil { + return nil, err + } + + outputTypes, err := parser.ParseTypeItems() + if err != nil { + return nil, err + } + + err = parser.Match(parser.curr, RIGHT_PAREN) + + typeDef := TypeDefinition{InputTypes: inputTypes, OutputTypes: outputTypes} + return &typeDef, nil +} + +func (parser *MShellParser) ParseTypeItems() ([]MShellType, error) { + types := []MShellType{} + + forLoop: + for { + switch parser.curr.Type { + case TYPEINT: + types = append(types, TypeInt{}) + parser.NextToken() + case TYPEFLOAT: + types = append(types, TypeInt{}) + parser.NextToken() + case STR: + types = append(types, TypeString{}) + parser.NextToken() + case TYPEBOOL: + types = append(types, TypeBool{}) + parser.NextToken() + case AMPERSAND: + // Parse tuple/heterogeneous list + typeTuple, err := parser.ParseTypeTuple() + if err != nil { + return nil, err + } + types = append(types, typeTuple) + case LEFT_SQUARE_BRACKET: + // Parse list + typeList, err := parser.ParseTypeList() + if err != nil { + return nil, err + } + types = append(types, typeList) + case LEFT_PAREN: + // Parse quote + typeQuote, err := parser.ParseTypeQuote() + if err != nil { + return nil, err + } + types = append(types, typeQuote) + case LITERAL: + // Parse generic + genericType := TypeGeneric{Name: parser.curr.Lexeme} + types = append(types, genericType) + parser.NextToken() + default: + break forLoop + } + } + + return types, nil +} + +func (parser *MShellParser) ParseTypeTuple() (*TypeTuple, error) { + + err := parser.Match(parser.curr, AMPERSAND) + if err != nil { + return nil, err + } + + err = parser.Match(parser.curr, LEFT_SQUARE_BRACKET) + if err != nil { + return nil, err + } + + types := []MShellType{} + for parser.curr.Type != RIGHT_SQUARE_BRACKET { + // Parse type + } + + parser.Match(parser.curr, RIGHT_SQUARE_BRACKET) + typeTuple := TypeTuple{Types: types} + return &typeTuple, nil +} + +func (parser *MShellParser) ParseTypeQuote() (*TypeQuote, error) { + err := parser.Match(parser.curr, LEFT_PAREN) + if err != nil { + return nil, err + } + + // Parse input types + inputTypes, err := parser.ParseTypeItems() + if err != nil { + return nil, err + } + + err = parser.Match(parser.curr, DOUBLEDASH) + + // Parse output types + outputTypes, err := parser.ParseTypeItems() + if err != nil { + return nil, err + } + + err = parser.Match(parser.curr, RIGHT_PAREN) + + typeQuote := TypeQuote{InputTypes: inputTypes, OutputTypes: outputTypes} + return &typeQuote, nil +} + +func (parser *MShellParser) ParseTypeList() (*TypeList, error) { + err := parser.Match(parser.curr, LEFT_SQUARE_BRACKET) + if err != nil { + return nil, err + } + + // Single type list + var listType MShellType + + // Parse type + switch parser.curr.Type { + case TYPEINT: + listType = TypeInt{} + parser.NextToken() + case TYPEFLOAT: + listType = TypeInt{} + parser.NextToken() + case STR: + listType = TypeString{} + parser.NextToken() + case TYPEBOOL: + listType = TypeBool{} + parser.NextToken() + case AMPERSAND: + // Parse tuple/heterogeneous list + typeTuple, err := parser.ParseTypeTuple() + if err != nil { + return nil, err + } + listType = typeTuple + case LEFT_SQUARE_BRACKET: + // Parse list + typeList, err := parser.ParseTypeList() + if err != nil { + return nil, err + } + listType = typeList + case LEFT_PAREN: + // Parse quote + typeQuote, err := parser.ParseTypeQuote() + if err != nil { + return nil, err + } + listType = typeQuote + case LITERAL: + // Parse generic + genericType := TypeGeneric{Name: parser.curr.Lexeme} + listType = genericType + parser.NextToken() + default: + return nil, errors.New(fmt.Sprintf("Unexpected token %s while parsing type list", parser.curr.Type)) + } + + err = parser.Match(parser.curr, RIGHT_SQUARE_BRACKET) + if err != nil { + return nil, err + } + + typeList := TypeList{ListType: listType} + return &typeList, nil +} + + func (parser *MShellParser) ParseList() (*MShellParseList, error) { list := &MShellParseList{} err := parser.Match(parser.curr, LEFT_SQUARE_BRACKET) diff --git a/tests/indexing.msh b/tests/indexing.msh index 2ab2845..8d0ac14 100644 --- a/tests/indexing.msh +++ b/tests/indexing.msh @@ -20,7 +20,7 @@ echo "12345" :-4 wl # 1 "12345" -4:-2 wl # 23 -def cjoin ", " join wl end +def cjoin ([str] --) ", " join wl end # Test deletion at index ["a" "b" "c" "d"] 2 del cjoin diff --git a/tests/simple_def.msh b/tests/simple_def.msh index 47f1625..a0dee36 100644 --- a/tests/simple_def.msh +++ b/tests/simple_def.msh @@ -1,8 +1,8 @@ -def square dup + end +def square (int -- int) dup + end [echo 7 square]; [echo "Hello," addWorld]; # All definitions are lifted -def addWorld " World!" + end +def addWorld (str -- str) " World!" + end