/
lexer.mll
118 lines (113 loc) · 3.45 KB
/
lexer.mll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
(** Lexer *)
{
open Parser
open Lexing
let incr_linenum lexbuf =
let pos = lexbuf.Lexing.lex_curr_p in
lexbuf.Lexing.lex_curr_p <- {
pos with
Lexing.pos_lnum = pos.Lexing.pos_lnum + 1;
Lexing.pos_bol = pos.Lexing.pos_cnum;
}
let error lexbuf msg =
let pos = lexbuf.Lexing.lex_curr_p in
print_string msg;
Printf.printf " at position %i.\n%!" (pos.pos_cnum - pos.pos_bol);
raise Parsing.Parse_error
}
let white = [' ' '\t']+
let num = ['0'-'9']
let alpha = ['a'-'z'] | ['A'-'Z'] | '\\'
let nat = num*
let ident = ['a'-'z'] (alpha | num | '_' | ''' )*
let constr = ['A'-'Z'] (alpha | num | '_' | ''' )*
rule main = parse
| '\n' { incr_linenum lexbuf; main lexbuf }
| white { main lexbuf }
| '(' { LPAREN }
| ')' { RPAREN }
| '{' { LBRACE }
| '}' { RBRACE }
| '[' { LBRACKET }
| ']' { RBRACKET }
| '<' { LANGLE }
| '>' { RANGLE }
| "fn" { FN }
| "λ" { LAMBDA }
| '\\' { LAMBDA }
| '+' { PLUS }
| '-' { MINUS }
| '*' { TIMES }
| ',' { COMMA }
| ''' { QUOTE }
| "''" { DOUBLEQUOTE }
| "'''" { TRIPLEQUOTE }
| ':' { COLON }
| ';' { SEMICOLON }
| '#' { SHARP }
| '=' { EQUALS }
| "return" { RETURN }
| "type" { TYPE }
| "void" { VOID }
| "unit" { UNIT }
| "box" { BOX }
| "array" { ARRAY }
| "print" { PRINT }
| "intadd" { INTADD }
| "intsub" { INTSUB }
| "intmul" { INTMUL }
| "intdiv" { INTDIV }
| "inteq" { INTEQ }
| "intshl" { INTSHL }
| "intshr" { INTSHR }
| "intsar" { INTSAR }
| "intand" { INTAND }
| "intor" { INTOR }
| "intxor" { INTXOR }
| "intlt" { INTLT }
| "intslt" { INTSLT }
| "alloc" { ALLOC }
| "free" { FREE }
| "load" { LOAD }
| "store" { STORE }
| "arrayalloc" { ARRAYALLOC }
| "arrayfree" { ARRAYFREE }
| "arrayget" { ARRAYGET }
| "push" { PUSH }
| "pop" { POP }
| "call" { CALL }
| "encode" { ENCODE }
| "decode" { DECODE }
| "if" { IF }
| "then" { THEN }
| "else" { ELSE }
| "int" { NAT }
| "direct" { HACK }
| "copy" { COPY }
| "let" { LET }
| "val" { VAL }
| "as" { AS }
| "of" { OF }
| "in" { IN }
| "case" { CASE }
| "->" { TO }
| "|" { VERTBAR }
| nat { NUM (int_of_string (Lexing.lexeme lexbuf)) }
| ident { IDENT (Lexing.lexeme lexbuf) }
| constr { CONSTR (Lexing.lexeme lexbuf) }
| eof { EOF }
| "/*" { comments 0 lexbuf}
| "\"" { let buf = Buffer.create 1 in STRING (str buf lexbuf)}
| _ { error lexbuf "Unexpected symbol" }
and comments level = parse
| '\n' { incr_linenum lexbuf; comments level lexbuf }
| "/*" { comments (level+1) lexbuf }
| "*/" { if level = 0 then main lexbuf else comments (level-1) lexbuf }
| _ { comments level lexbuf }
| eof { print_endline "comments are not closed";
raise End_of_file }
and str buf = parse
| '"' { Buffer.contents buf }
| "\\n" { Buffer.add_char buf '\n'; str buf lexbuf }
| "\\\"" { Buffer.add_char buf '"'; str buf lexbuf }
| _ as ch { Buffer.add_char buf ch; str buf lexbuf }