/
forth.lex
130 lines (105 loc) · 3.75 KB
/
forth.lex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
%{
/* LEX input for FORTH input file scanner */
/*
Specifications are as follows:
This file must be run through "sed" to change
yylex () {
to
TOKEN *yylex () {
where the sed script is
sed "s/yylex () {/TOKEN *yylex () {/" lex.yy.c
Note that spaces have been included above so these lines won't be
mangled by sed; in actuality, the two blanks surrounding () are
removed.
The function "yylex()" always returns a pointer to a structure:
struct tokenrec {
int type;
char *text;
}
#define TOKEN struct tokenrec
where the type is a hint as to the word's type:
DECIMAL for decimal literal d+
OCTAL for octal literal 0d*
HEX for hex literal 0xd+ or 0Xd+
C_BS for a literal Backspace '\b'
C_FF for a literal Form Feed '\f'
C_NL for a literal Newline '\n'
C_CR for a literal Carriage Return '\r'
C_TAB for a literal Tab '\t'
C_BSLASH for a literal backslash '\\'
C_IT for an other character literal 'x' where x is possibly '
STRING_LIT for a string literal (possibly containing \")
COMMENT for a left-parenthesis (possibly beginning a comment)
PRIM for "PRIM"
CONST for "CONST"
VAR for "VAR"
USER for "USER"
LABEL for "LABEL"
COLON for ":"
SEMICOLON for ";"
SEMISTAR for ";*" (used to make words IMMEDIATE)
NUL for the token {NUL}, which gets compiled as a null byte;
this special interpretation takes place in the COLON
code.
LIT for the word "LIT" (treated like OTHER, except that
no warning is generated when a literal follows this)
OTHER for an other word not recognized above
Note that this is just a hint: the meaning of any string of characters
depends on the context.
*/
%}
decimal [0-9]
hex [0-9A-Fa-f]
octal [0-7]
white [ \t\n\r\f]
%{
#include "forth_lx.h"
TOKEN token;
#define YY_INPUT(buf,result,max_size) \
{ \
int c = getchar(); \
result = (c == EOF) ? YY_NULL : (buf[0] = c, 1); \
}
%}
%%
{white}* /* whitespace -- keep looping */ ;
-?[1-9]{decimal}*/{white} { token.type = DECIMAL; token.text = yytext;
return &token; }
-?0{octal}*/{white} { token.type = OCTAL; token.text = yytext;
return &token; }
-?0[xX]{hex}+/{white} { token.type = HEX; token.text = yytext;
return &token; }
\'\\b\'/{white} { token.type = C_BS; token.text = yytext; return &token; }
\'\\f\'/{white} { token.type = C_FF; token.text = yytext; return &token; }
\'\\n\'/{white} { token.type = C_NL; token.text = yytext; return &token; }
\'\\r\'/{white} { token.type = C_CR; token.text = yytext; return &token; }
\'\\t\'/{white} { token.type = C_TAB; token.text = yytext; return &token; }
\'\\\\\'/{white} { token.type = C_BSLASH; token.text = yytext; return &token; }
\'.\'/{white} { token.type = C_LIT; token.text = yytext; return &token; }
\"(\\\"|[^"])*\"/{white} { token.type = STRING_LIT; token.text = yytext;
return &token; }
"("/{white} { token.type = COMMENT; token.text = yytext;
return &token; }
"PRIM"/{white} { token.type = PRIM; token.text = yytext;
return &token; }
"CONST"/{white} { token.type = CONST; token.text = yytext;
return &token; }
"VAR"/{white} { token.type = VAR; token.text = yytext;
return &token; }
"USER"/{white} { token.type = USER; token.text = yytext;
return &token; }
"LABEL"/{white} { token.type = LABEL; token.text = yytext;
return &token; }
":"/{white} { token.type = COLON; token.text = yytext;
return &token; }
";"/{white} { token.type = SEMICOLON; token.text = yytext;
return &token; }
";*"/{white} { token.type = SEMISTAR; token.text = yytext;
return &token; }
"{NUL}"/{white} { token.type = NUL; token.text = yytext;
return &token; }
"LIT"/{white} { token.type = LIT; token.text = yytext;
return &token; }
[^ \n\t\r\f]+/{white} { token.type = OTHER; token.text = yytext;
return &token; }
%%