This repository has been archived by the owner on May 12, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
lexer.mll
86 lines (78 loc) · 2.19 KB
/
lexer.mll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
{
open Parser
open Lexing
exception Error of string
(* lève l'exception avec des informations de positions *)
let error lexbuf =
raise (Error ("Unexpected char: " ^ lexeme lexbuf ^ " at "
^ string_of_int (lexeme_start lexbuf) ^ "-"
^ string_of_int (lexeme_end lexbuf)))
(* on utilise une table pour les mots-clefs de façon à éviter l'ajout *)
(* d'états à l'automate résultant *)
let ident =
let kws = Hashtbl.create 16 in
List.iter (fun (kw, token) -> Hashtbl.add kws kw token)
[
"const", CONST;
"print", PRINT;
"if", IF;
"else", ELSE;
"while", WHILE;
"bool", BOOL;
"int", INT;
"rat", RAT;
"call", CALL;
"num", NUM;
"denom", DENOM;
"true", TRUE;
"false", FALSE;
"return", RETURN
];
fun id ->
match Hashtbl.find_opt kws id with
| Some kw -> kw
| None -> ID id
let typident tid = TID tid
}
rule token = parse
(* ignore les sauts de lignes mais les compte quand même *)
| '\n' { new_line lexbuf; token lexbuf }
(* ignore les espaces et tabulations *)
| [' ' '\t'] { token lexbuf }
(* ignore les commentaires *)
| "//"[^'\n']* { token lexbuf }
(* caractères spéciaux de RAT *)
| ";" { PV }
| "{" { AO }
| "}" { AF }
| "(" { PO }
| ")" { PF }
| "=" { EQUAL }
| "[" { CO }
| "]" { CF }
| "/" { SLASH }
| "+" { PLUS }
| "*" { MULT }
| "<" { INF }
(* Pointeurs *)
| "&" { AND }
| "new" { NEW }
| "null" { NULL }
(* Types nommés *)
| "typedef" { TYPEDEF }
(* struct *)
| "struct" { STRUCT }
| "." { DOT }
(* constantes entières *)
| ("-")?['0'-'9']+ as i
{ ENTIER (int_of_string i) }
(* identifiants et mots-clefs *)
| ['a'-'z'](['A'-'Z''a'-'z''0'-'9']|"-"|"_")* as n
{ ident n }
(* Types nommés *)
| ['A'-'Z'](['A'-'Z''a'-'z''0'-'9']|"-"|"_")* as n
{ typident n }
(* fin de lecture *)
| eof { EOF }
(* entrée non reconnue *)
| _ { error lexbuf }