12011619 Liquan Wang
12111744 Wenhui Tao
12111611 Ruixiang Jiang
> make splc #compile the splc compiler for SPL language
> make test #compile the compiler and run test cases
> make clean #clean the directory
> make lexer #using lex.l to compile the lexer for debug
In the lex.l
file
-
yylval
returns the<name>\n
of the lexical unit, for example"{" {yylval=strdup("LC\n"); return LC;}
-
We detect tokens in the correct format and tokens in the wrong format.
-
The correct token can help the Bison part to do syntax analysis
-
The wrong token can also help the bison to do error recovery and analyze the entire program
/* For example: invalid decimal and hex INT: starting with 0 */ 0[0-9]+|0[xX]0[0-9a-fA-F]+ {lexical_error(yytext,yylineno); return INT;}
-
In the syntax.y
file
-
The various operators are defined from top to bottom in order of precedence from low to high.
The priority is specified by text
%prec
to eliminate some ambiguity. -
Generate syntax tree:
Instead of defining
struct Node
, we use the functionconcat_shift()
to truncate the next level of string by text\n
and then add two spaces before each partIn each production, we just need to use the
concat_shift()
as below to merge the syntax information in$n
to$$
This method's code implementation is simplier than the traditional
struct Node
methodFunDec: ID LP VarList RP {asprintf(&$$,"FunDec (%d)\n%s\n", @$.first_line, concat_shift($1,$2,$3,$4));}
The function uses Variadic Template and Fold Expressions in C++17, so we should use g++ option
-std=c++17
to compile thesplc
//concat the strings and shift two spaces template<typename... Args> char* concat_shift(Args... strs) { //1.concat the strings char* concat = strdup(""); (asprintf(&concat,"%s%s",concat,strs), ...); //2.shift two spaces in each line char* result = strdup(""); char* line=strtok(concat,"\n"); while(line!=NULL){ asprintf(&result,"%s %s\n",result,line); line=strtok(NULL,"\n"); } return result; }
When the
splc
parse the program recursively, we can get the syntax tree as belowProgram (1) ExtDefList (1) ExtDef (1) Specifier (1) TYPE: int FunDec (1) ID: test_1_o02 LP RP CompSt (2) LC DefList (3) ...
-
In
lex.l
we detect the lexical error, and pass the right token to the parser as if the lexeme is right, so the syntax analysis process can continue without errorFor example, the code below recognizes the invalid decimal and hex
INT
token starting with 00[0-9]+|0[xX]0[0-9a-fA-F]+ {lexical_error(yytext,yylineno); return INT;}
Moreover, we can also recognize the invalid
CHAR
token like below'([^']|\\[xX](0|[1-9a-fA-F][0-9a-fA-F]?))' {asprintf(&yylval,"CHAR: %s\n",yytext); return CHAR;} '[^']*' {lexical_error(yytext,yylineno); return CHAR;} /*Invalid CHAR: not matched by the above valid CHAR regex will fail to this regex*/
-
In
syntax.y
we detect the syntax error by matching the wrong sequence of terminals and nonterminals as belowExtDef: Specifier ExtDecList SEMI {asprintf(&$$,"ExtDef (%d)\n%s\n", @$.first_line, concat_shift($1,$2,$3));} | Specifier ExtDecList error {syntax_error("semicolon \';\'",@2.last_line);} CompSt: ... /*The two error cases below indicate that declaration must before definition*/ | LC DefList StmtList DefList error {syntax_error("specifier",@3.first_line);} | LC StmtList DefList error {syntax_error("specifier",@2.first_line);}
Beyond the basic requirement, we also implement some extra features
Because of aesthetic issues, the code is partially commented, but the actual program is not commented
To make the code more intuitive, we’ve attached test samples and results that you can skip if you don’t want to read them
/*in lex.l*/
"//".* /*ignore single-line comment*/
"/*"(\*[^/]|[^*])*"*/" /*ignore multi-line comment*/
\"(\\\"|[^\"])*\" {asprintf(&yylval,"STRING: %s\n",yytext); return STRING;}
Enter the <macro>
state after a special beginning is recognized to distinguish it from the exception recognition capture
/*in lex.l*/
"#include" {yylval=strdup("INCLUDE\n"); BEGIN( macro); return INCLUDE;}
"#define" {yylval=strdup("DEFINE\n"); BEGIN(macro); return DEFINE;}
"#ifdef" {yylval=strdup("IFDEF\n"); BEGIN(macro); return IFDEF;}
"#else" {yylval=strdup("MACROELSE\n"); return MACROELSE;}
"#endif" {yylval=strdup("ENDIF\n"); return ENDIF;}
<macro>{
"," {yylval=strdup("COMMA\n"); return COMMA;}
"<" {yylval=strdup("LT\n"); return LT;}
">" {yylval=strdup("GT\n"); return GT;}
\" {yylval=strdup("DQUOT\n"); return DQUOT;}
\n {BEGIN(INITIAL);}
[ \t]+ /* ignore word splits */
[^," "<>"\n]+ {asprintf(&yylval,"MACRO: %s\n",yytext); return MACRO;}
}
Macro and file introduction automatically changes to the beginning of the file, and parallel output, to achieve #define
#ifdef
#else
#endif
#include
In the SPL file, we can define multiple macros on a single line separated by commas
/*in syntax.y*/
Program:
ExtDefList {if(!has_error){printf("Program (%d)\n%s", @$.first_line, concat_shift($1));}}
| MacroStmt ExtDefList {if(!has_error){printf("Program (%d)\n%s", @$.first_line, concat_shift($1,$2));}}
MacroStmt:
IncludeStmt {asprintf(&$$,"MacroStmt (%d)\n%s\n", @$.first_line, concat_shift($1));}
| DefineStmt {asprintf(&$$,"MacroStmt (%d)\n%s\n", @$.first_line, concat_shift($1));}
| DefineStmt MacroStmt {asprintf(&$$,"MacroStmt (%d)\n%s\n", @$.first_line, concat_shift($1,$2));}
| IncludeStmt MacroStmt {asprintf(&$$,"MacroStmt (%d)\n%s\n", @$.first_line, concat_shift($1,$2));}
IncludeStmt:
INCLUDE LT MACRO GT {asprintf(&$$,"IncludeStmt (%d)\n%s", @$.first_line, concat_shift($1,$2,$3,$4));}
| INCLUDE DQUOT MACRO DQUOT {asprintf(&$$,"IncludeStmt (%d)\n%s", @$.first_line, concat_shift($1,$2,$3,$4));}
DefineStmt:
DEFINE MACRO MACRO {asprintf(&$$,"DefineStmt (%d)\n%s", @$.first_line, concat_shift($1,$2,$3));}
| DefineStmt COMMA MACRO MACRO {asprintf(&$$,"DefineStmt (%d)\n%s", @$.first_line, concat_shift($1,$2,$3));}
Stmt:
...
| IFDEF Stmt ENDIF {asprintf(&$$,"Stmt (%d)\n%s\n", @$.first_line, concat_shift($1,$2,$3));}
| IFDEF MACRO Stmt MACROELSE Stmt ENDIF {asprintf(&$$,"Stmt (%d)\n%s\n", @$.first_line, concat_shift($1,$2,$3,$4,$5,$6));}
/*in lex.l*/
for {yylval=strdup("FOR\n"); return FOR;}
In the parser, the logic of for
is very similar to if
and while
/*in syntax.y*/
Stmt:
...
| FOR LP Exp SEMI Exp SEMI Exp RP Stmt {
asprintf(&$$,"Stmt (%d)\n%s\n", @$.first_line, concat_shift($1,$2,$3,$4,$5,$6,$7,$8,$9));
}