From 8fd123caa22f5230de27b781ff56efc29ffe7987 Mon Sep 17 00:00:00 2001 From: Vipul-Cariappa Date: Sun, 31 Dec 2023 10:25:16 +0530 Subject: [PATCH 1/3] slightly better error msg. Line/Column num is displayed --- .gitignore | 1 + src/common.h | 7 ++++- src/lexer.l | 78 ++++++++++++++++++++++++++++------------------------ src/main.c | 9 ++++-- src/parser.y | 12 +++++--- 5 files changed, 64 insertions(+), 43 deletions(-) diff --git a/.gitignore b/.gitignore index 77fd135..9906c53 100644 --- a/.gitignore +++ b/.gitignore @@ -82,3 +82,4 @@ _deps *.yy.c *tmp* +KariLang diff --git a/src/common.h b/src/common.h index 73de82b..649ab78 100644 --- a/src/common.h +++ b/src/common.h @@ -1,5 +1,7 @@ #pragma once +#define YYERROR_VERBOSE 1 + #include "DS.h" #include #include @@ -8,10 +10,13 @@ extern FILE *yyin; extern int yylex(void); extern int yyparse(void); -extern int yywrap(void); extern void yyerror(char const *s); extern int yylineno; +extern int column; extern char *yytext; +extern char *filename; + +extern char syntax_error_msg[]; typedef enum { UNDEFINED, diff --git a/src/lexer.l b/src/lexer.l index 5205212..da050f4 100644 --- a/src/lexer.l +++ b/src/lexer.l @@ -2,43 +2,49 @@ #include "parser.tab.h" #include #include + + static int next_column = 1; + int column = 1; + + #define HANDLE_COLUMN \ + column = next_column; \ + next_column += strlen(yytext) %} +%option noyywrap noinput nounput yylineno + %% -"valdef" { return KW_VALDEF; } -"funcdef" { return KW_FUNCDEF; } -"bool" { return KW_BOOL; } -"int" { return KW_INT; } -"true" { return KW_TRUE; } -"false" { return KW_FALSE; } -"if" { return KW_IF; } -"then" { return KW_THEN; } -"else" { return KW_ELSE; } -"," { return COMMA; } -"+" { return PLUS; } -"-" { return MINUS; } -"*" { return MULTIPLY; } -"/" { return DIVIDE; } -"%" { return MODULO; } -"&&" { return AND; } -"||" { return OR; } -"!" { return NOT; } -"==" { return EQUALS; } -"!=" { return NOT_EQUALS; } -">" { return GREATER; } -"<" { return LESSER; } -">=" { return GREATER_EQUALS; } -"<=" { return LESSER_EQUALS; } -"->" { return RETURN; } -"(" { return OPEN_BRACKETS; } -")" { return CLOSE_BRACKETS; } -"=" { return ASSIGN; } -[0-9]* { yylval.integer = atoi(yytext); return INTEGER; } -[a-zA-Z_][0-9a-zA-Z_]* { yylval.identifier = strdup(yytext); return IDENTIFIER; } -[ \n\t] { ; } -. { fprintf(stderr, "Unexpected Token\n"); return YYUNDEF; } +"valdef" { HANDLE_COLUMN; return KW_VALDEF; } +"funcdef" { HANDLE_COLUMN; return KW_FUNCDEF; } +"bool" { HANDLE_COLUMN; return KW_BOOL; } +"int" { HANDLE_COLUMN; return KW_INT; } +"true" { HANDLE_COLUMN; return KW_TRUE; } +"false" { HANDLE_COLUMN; return KW_FALSE; } +"if" { HANDLE_COLUMN; return KW_IF; } +"then" { HANDLE_COLUMN; return KW_THEN; } +"else" { HANDLE_COLUMN; return KW_ELSE; } +"," { HANDLE_COLUMN; return COMMA; } +"+" { HANDLE_COLUMN; return PLUS; } +"-" { HANDLE_COLUMN; return MINUS; } +"*" { HANDLE_COLUMN; return MULTIPLY; } +"/" { HANDLE_COLUMN; return DIVIDE; } +"%" { HANDLE_COLUMN; return MODULO; } +"&&" { HANDLE_COLUMN; return AND; } +"||" { HANDLE_COLUMN; return OR; } +"!" { HANDLE_COLUMN; return NOT; } +"==" { HANDLE_COLUMN; return EQUALS; } +"!=" { HANDLE_COLUMN; return NOT_EQUALS; } +">" { HANDLE_COLUMN; return GREATER; } +"<" { HANDLE_COLUMN; return LESSER; } +">=" { HANDLE_COLUMN; return GREATER_EQUALS; } +"<=" { HANDLE_COLUMN; return LESSER_EQUALS; } +"->" { HANDLE_COLUMN; return RETURN; } +"(" { HANDLE_COLUMN; return OPEN_BRACKETS; } +")" { HANDLE_COLUMN; return CLOSE_BRACKETS; } +"=" { HANDLE_COLUMN; return ASSIGN; } +[0-9]* { HANDLE_COLUMN; yylval.integer = atoi(yytext); return INTEGER; } +[a-zA-Z_][0-9a-zA-Z_]* { HANDLE_COLUMN; yylval.identifier = strdup(yytext); return IDENTIFIER; } +[ \t]+ { HANDLE_COLUMN; } +[\n] { HANDLE_COLUMN; next_column = 1; } +. { HANDLE_COLUMN; return YYUNDEF; } %% - -int yywrap(void) { - return 1; -} diff --git a/src/main.c b/src/main.c index 0e816e9..f3772b9 100644 --- a/src/main.c +++ b/src/main.c @@ -5,6 +5,7 @@ IMPLEMENT_HASH_FUNCTION; DS_TABLE_DEF(ast, AST, NULL); ast_table_t *ast; +char *filename; int main(int argc, char *argv[]) { if (argc != 3) { @@ -12,7 +13,7 @@ int main(int argc, char *argv[]) { return 1; } - char *filename = argv[1]; + filename = argv[1]; FILE *file = fopen(filename, "r"); if (file == NULL) { @@ -31,7 +32,11 @@ int main(int argc, char *argv[]) { ast = ast_table_new(100); - yyparse(); + if (yyparse()) { + fclose(file); + fprintf(stderr, "%s", syntax_error_msg); + return 1; + } /* END */ fclose(file); diff --git a/src/parser.y b/src/parser.y index cf029a3..d64d86f 100644 --- a/src/parser.y +++ b/src/parser.y @@ -1,5 +1,8 @@ %{ #include "common.h" + + #define ERROR_MSG_LEN 500 + char syntax_error_msg[ERROR_MSG_LEN]; %} %union { @@ -58,8 +61,8 @@ %% input: %empty - | input value_definition { assert(ast_table_insert(ast, ($2)->name, (AST){.type = AST_VARIABLE, .value.var = $2})); } - | input function_definition { assert(ast_table_insert(ast, ($2)->funcname, (AST){.type = AST_FUNCTION, .value.func = $2})); }; + | input value_definition { (ast_table_insert(ast, ($2)->name, (AST){.type = AST_VARIABLE, .value.var = $2})); } + | input function_definition { (ast_table_insert(ast, ($2)->funcname, (AST){.type = AST_FUNCTION, .value.func = $2})); }; function_definition: KW_FUNCDEF IDENTIFIER function_definition_arguments RETURN KW_BOOL ASSIGN expression { $$ = set_function_return_value(set_function_name($3, $2), BOOL, $7); } | KW_FUNCDEF IDENTIFIER function_definition_arguments RETURN KW_INT ASSIGN expression { $$ = set_function_return_value(set_function_name($3, $2), INT, $7);}; @@ -102,6 +105,7 @@ function_call_arguments: expression { $$ = add_function_call_argument_expression | expression COMMA function_call_arguments { $$ = add_function_call_argument_expression($3, $1); }; %% -void yyerror(char const *s) { - fprintf(stderr, "PARSER ERROR: %s\n", s); +void yyerror(char const *str) { + snprintf(syntax_error_msg, ERROR_MSG_LEN, + "ERROR: %s in %s:%d:%d\n", str, filename, yylineno, column); } From 0dc106164bc40cd2e72d96e839108787424fa131 Mon Sep 17 00:00:00 2001 From: Vipul-Cariappa Date: Sun, 31 Dec 2023 10:38:47 +0530 Subject: [PATCH 2/3] out of order evaluation of valdef(s) --- src/interpreter.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/interpreter.c b/src/interpreter.c index fb9d54a..0eaab88 100644 --- a/src/interpreter.c +++ b/src/interpreter.c @@ -52,12 +52,21 @@ bool interpret(int input, int *output) { while (NULL != (tree = ast_table_iter_next(ast, &key))) { switch (tree->type) { case AST_VARIABLE: + if (integer_table_get_ptr(globalIntegers, tree->value.var->name)) { + break; + } + errno = 0; if (tree->value.var->type == INT) { assert(integer_table_insert( globalIntegers, tree->value.var->name, evaluate_expression(tree->value.var->expression, NULL) .integer)); + break; + } + if (boolean_table_get_ptr(globalBooleans, tree->value.var->name)) { + break; } + errno = 0; if (tree->value.var->type == BOOL) { assert(boolean_table_insert( globalBooleans, tree->value.var->name, @@ -119,11 +128,28 @@ ExpressionResult evaluate_expression(Expression *exp, Context *cxt) { if (v) { return (ExpressionResult){.integer = *(int *)v}; } + errno = 0; v = boolean_table_get_ptr(globalBooleans, exp->value.variable); if (v) { return (ExpressionResult){.boolean = *(bool *)v}; } errno = 0; + AST *tree = ast_table_get_ptr(ast, exp->value.variable); + if (tree) { + ExpressionResult result_exp = + evaluate_expression(tree->value.var->expression, NULL); + if (tree->value.var->type == INT) { + assert(integer_table_insert( + globalIntegers, tree->value.var->name, result_exp.integer)); + return result_exp; + } + if (tree->value.var->type == BOOL) { + assert(boolean_table_insert( + globalBooleans, tree->value.var->name, result_exp.boolean)); + return result_exp; + } + } + goto error; } case PLUS_EXPRESSION: From fe69f3a03a2ae23cc54177ef83359a2709c3addb Mon Sep 17 00:00:00 2001 From: Vipul-Cariappa Date: Sun, 31 Dec 2023 10:49:50 +0530 Subject: [PATCH 3/3] update the grammar/syntax --- README.md | 22 ++++++++++++++++------ src/lexer.l | 2 ++ src/parser.y | 22 ++++++++++------------ 3 files changed, 28 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 61e810a..23ad267 100644 --- a/README.md +++ b/README.md @@ -5,15 +5,25 @@ Toy Functional Programming Language ## Example Program ```text -valdef int x = 10 +valdef zero: int = 0; +valdef one: int = 1; +valdef two: int = one + one; -funcdef fib n int -> int = - if n < 2 then +funcdef sum(n: int) -> int = _sum(zero, n); + +funcdef _sum(c: int, n: int) -> int = + if n == zero then + c + else + _sum(c + n, n + -1); + +funcdef fib(n: int) -> int = + if n < two then n - else - fib(n + -1) + fib(n + -2) + else + fib(n + -1) + fib(n + -two); -funcdef main n int -> int = fib(n) +funcdef main(n: int) -> int = fib(n); ``` To Run diff --git a/src/lexer.l b/src/lexer.l index da050f4..bf70a77 100644 --- a/src/lexer.l +++ b/src/lexer.l @@ -23,6 +23,8 @@ "if" { HANDLE_COLUMN; return KW_IF; } "then" { HANDLE_COLUMN; return KW_THEN; } "else" { HANDLE_COLUMN; return KW_ELSE; } +";" { HANDLE_COLUMN; return STATEMENT_END; } +":" { HANDLE_COLUMN; return TYPE_OF; } "," { HANDLE_COLUMN; return COMMA; } "+" { HANDLE_COLUMN; return PLUS; } "-" { HANDLE_COLUMN; return MINUS; } diff --git a/src/parser.y b/src/parser.y index d64d86f..731548f 100644 --- a/src/parser.y +++ b/src/parser.y @@ -23,6 +23,8 @@ %token KW_THEN %token KW_ELSE %token COMMA +%token STATEMENT_END +%token TYPE_OF %token ASSIGN %token PLUS %token MINUS @@ -64,20 +66,16 @@ input: %empty | input value_definition { (ast_table_insert(ast, ($2)->name, (AST){.type = AST_VARIABLE, .value.var = $2})); } | input function_definition { (ast_table_insert(ast, ($2)->funcname, (AST){.type = AST_FUNCTION, .value.func = $2})); }; -function_definition: KW_FUNCDEF IDENTIFIER function_definition_arguments RETURN KW_BOOL ASSIGN expression { $$ = set_function_return_value(set_function_name($3, $2), BOOL, $7); } - | KW_FUNCDEF IDENTIFIER function_definition_arguments RETURN KW_INT ASSIGN expression { $$ = set_function_return_value(set_function_name($3, $2), INT, $7);}; +function_definition: KW_FUNCDEF IDENTIFIER OPEN_BRACKETS function_definition_arguments CLOSE_BRACKETS RETURN KW_BOOL ASSIGN expression STATEMENT_END { $$ = set_function_return_value(set_function_name($4, $2), BOOL, $9); } + | KW_FUNCDEF IDENTIFIER OPEN_BRACKETS function_definition_arguments CLOSE_BRACKETS RETURN KW_INT ASSIGN expression STATEMENT_END { $$ = set_function_return_value(set_function_name($4, $2), INT, $9);}; -function_definition_arguments: IDENTIFIER KW_BOOL { $$ = add_function_argument(make_function(), $1, BOOL); } - | OPEN_BRACKETS IDENTIFIER KW_BOOL CLOSE_BRACKETS { $$ = add_function_argument(make_function(), $2, BOOL); } - | IDENTIFIER KW_INT { $$ = add_function_argument(make_function(), $1, INT); } - | OPEN_BRACKETS IDENTIFIER KW_INT CLOSE_BRACKETS { $$ = add_function_argument(make_function(), $2, INT); } - | IDENTIFIER KW_BOOL function_definition_arguments { $$ = add_function_argument($3, $1, BOOL); } - | OPEN_BRACKETS IDENTIFIER KW_BOOL CLOSE_BRACKETS function_definition_arguments { $$ = add_function_argument($5, $2, BOOL); } - | IDENTIFIER KW_INT function_definition_arguments { $$ = add_function_argument($3, $1, INT); } - | OPEN_BRACKETS IDENTIFIER KW_INT CLOSE_BRACKETS function_definition_arguments { $$ = add_function_argument($5, $2, INT); }; +function_definition_arguments: IDENTIFIER TYPE_OF KW_BOOL { $$ = add_function_argument(make_function(), $1, BOOL); } + | IDENTIFIER TYPE_OF KW_INT { $$ = add_function_argument(make_function(), $1, INT); } + | IDENTIFIER TYPE_OF KW_BOOL COMMA function_definition_arguments { $$ = add_function_argument($5, $1, BOOL); } + | IDENTIFIER TYPE_OF KW_INT COMMA function_definition_arguments { $$ = add_function_argument($5, $1, INT); } -value_definition: KW_VALDEF KW_BOOL IDENTIFIER ASSIGN expression { $$ = make_variable($3, BOOL, $5); } - | KW_VALDEF KW_INT IDENTIFIER ASSIGN expression { $$ = make_variable($3, INT, $5); }; +value_definition: KW_VALDEF IDENTIFIER TYPE_OF KW_BOOL ASSIGN expression STATEMENT_END { $$ = make_variable($2, BOOL, $6); } + | KW_VALDEF IDENTIFIER TYPE_OF KW_INT ASSIGN expression STATEMENT_END { $$ = make_variable($2, INT, $6); }; expression: IDENTIFIER { $$ = make_variable_expression($1); } | INTEGER { $$ = make_integer_expression($1); }