-
Notifications
You must be signed in to change notification settings - Fork 1
/
lexer.mll
177 lines (170 loc) · 7.27 KB
/
lexer.mll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
{
open Lexing
exception SyntaxError of string
let incr_linenum lexbuf=
let pos=lexbuf.lex_curr_p in
lexbuf.lex_curr_p<-{ pos with
pos_lnum=pos.pos_lnum + 1;
pos_bol=pos.pos_cnum;
}
(* assume all variables live with global scope *)
let env=ref []
type state=
| CODE
| NEWLINE
| COMMENT
| STR
| PSTRING
| PNUMBER
| IS
let state=ref CODE
let is_cond=ref false
}
let int = '-'? ['0'-'9'] ['0'-'9']*
let digit = ['0'-'9']
let frac = '.' digit*
let exp = ['e' 'E'] ['-' '+']? digit+
let float = digit+ frac? exp?
let whitespace=[' ' '\t']+
let newline="\r\n"|'\r'|'\n'
let blankline="\r\n\r\n"|"\r\r"|"\n\n"
let pronoun="it"|"he"|"she"|"him"|"her"
|"they"|"them"|"ze"|"hir"|"zie"|"zir"
|"xe"|"xem"|"ve"|"ver"
let article="a"|"an"|"the"|"my"|"your"|"A"|"An"|"The"|"My"|"Your"
let captical_article="A"|"An"|"The"|"My"|"Your"
let captical=['A'-'Z'] ['A'-'Z' 'a'-'z']*
let id=(article whitespace ['a'-'z']+)
let bool_true="true"|"right"|"yes"|"ok"
let bool_false="false"|"wrong"|"no"|"lies"
let null="nothing"|"nowhere"|"nobody"|"empty"|"gone"
let is="is"|"was"|"were"
let gt=is whitespace ("higher"|"greater"|"bigger"|"stronger") whitespace "than"
let lt=is whitespace ("lower"|"less"|"smaller"|"weaker") whitespace "than"
let ge=is whitespace "as" whitespace ("high"|"great"|"big"|"strong") whitespace "as"
let le=is whitespace "as" whitespace ("low"|"little"|"small"|"weak") whitespace "as"
rule read=
parse
| '(' { state:=COMMENT; read_comment lexbuf }
| whitespace { read lexbuf }
| newline { is_cond:=false; incr_linenum lexbuf; state:=NEWLINE; read_newline lexbuf }
| '"' { state:=STR; read_string (Buffer.create 16) lexbuf }
| "mysterious" { Parser.UNDEFINED }
| "says " { state:=PSTRING; Parser.IS }
| "Put" { Parser.PUT }
| "into" { Parser.INTO }
| "Build" { Parser.BUILD }
| "up" { Parser.UP }
| "Knock" { Parser.KNOCK }
| "down" { Parser.DOWN }
| "plus"|"with" { Parser.PLUS }
| "minus"|"without" { Parser.MINUS }
| "times"|"of" { Parser.TIMES }
| "over" { Parser.DIVIDE }
| "Listen" { Parser.LISTEN }
| "to" { Parser.TO }
| "Say"|"Shout"|"Whisper"|"Scream" { Parser.PRINT }
| "If" { is_cond:=true; Parser.IF }
| "Else" { Parser.ELSE }
| "While" { is_cond:=true; Parser.WHILE }
| "Until" { is_cond:=true; Parser.UNTIL }
| "Break"|"Break" whitespace "it" whitespace "down" { Parser.BREAK }
| "Continue"|"Take" whitespace "it" whitespace "to" whitespace "the" whitespace "top" { Parser.CONTINUE }
| "takes" { Parser.TAKE }
| "Give" whitespace "back" { Parser.RETURN }
| "and" { Parser.AND }
| "or" { Parser.OR }
| "taking" { Parser.TAKING }
| ',' { Parser.COMMA }
| null { Parser.NULL }
| int { Parser.NUM (float_of_string (lexeme lexbuf)) }
| float { Parser.NUM (float_of_string (lexeme lexbuf)) }
| bool_true { Parser.TRUE }
| bool_false { Parser.FALSE }
| gt { Parser.GT }
| lt { Parser.LT }
| ge { Parser.GE }
| le { Parser.LE }
| ((is whitespace "not")|"aint") whitespace { (if not !is_cond then state:=IS); Parser.ISNOT }
| is whitespace { (if not !is_cond then state:=IS); Parser.IS }
| pronoun { try Parser.VARIABLE(List.hd !env) with (Failure hd) -> raise (SyntaxError ((lexeme lexbuf)^" refers to nothing")) }
| id { env:=(String.lowercase (lexeme lexbuf))::!env; (Parser.VARIABLE (String.lowercase (lexeme lexbuf))) }
| captical { let buf=Buffer.create 32 in let ()=Buffer.add_string buf (lexeme lexbuf) in read_var buf lexbuf }
| _ { raise (SyntaxError ("Unexpected character:"^(lexeme lexbuf))) }
| eof { state:=CODE; Parser.EOF }
and read_var buf=
parse
| newline { is_cond:=false; incr_linenum lexbuf; state:=NEWLINE; env:=(Buffer.contents buf)::!env; Parser.VARIABLE (Buffer.contents buf) }
| "Put" { Parser.PUT }
| "Build" { Parser.BUILD }
| "Knock" { Parser.KNOCK }
| "Listen" { Parser.LISTEN }
| "Say"|"Shout"|"Whisper"|"Scream" { Parser.PRINT }
| "If" { is_cond:=true; Parser.IF }
| "Else" { Parser.ELSE }
| "While" { is_cond:=true; Parser.WHILE }
| "Until" { is_cond:=true; Parser.UNTIL }
| "Break"|"Break" whitespace "it" whitespace "down" { Parser.BREAK }
| "Continue"|"Take" whitespace "it" whitespace "to" whitespace "the" whitespace "top" { Parser.CONTINUE }
| "Give" whitespace "back" { Parser.RETURN }
| whitespace { read_var buf lexbuf }
| captical { Buffer.add_char buf ' ';Buffer.add_string buf (lexeme lexbuf) ; read_var buf lexbuf }
| "" { env:=(Buffer.contents buf)::!env; Parser.VARIABLE (Buffer.contents buf) }
and read_is=
parse
| "true" { state:=CODE; Parser.TRUE }
| "false" { state:=CODE; Parser.FALSE }
| "nothing"|"nowhere"|"nobody" { state:=CODE; Parser.NULL }
| float { state:=CODE; Parser.NUM (float_of_string (lexeme lexbuf)) }
| int { state:=CODE; Parser.NUM (float_of_string (lexeme lexbuf)) }
| "" { state:=PNUMBER; read_number 0 false (Buffer.create 16) lexbuf }
| eof { state:=CODE; raise (SyntaxError ("Is literal is not terminated")) }
and read_number count period buf=
parse
| newline { if count>0 then Buffer.add_string buf (string_of_int (count mod 10)); incr_linenum lexbuf; state:=NEWLINE; Parser.NUM (float_of_string (Buffer.contents buf)) }
| whitespace { if count>0 then Buffer.add_string buf (string_of_int (count mod 10)); read_number 0 period buf lexbuf }
| '.' { if period then read_number count period buf lexbuf else (Buffer.add_string buf (string_of_int (count mod 10)); Buffer.add_char buf '.'; read_number 0 true buf lexbuf) }
| ['A'-'Z' 'a'-'z'] { read_number (count+1) period buf lexbuf }
| _ { read_number count period buf lexbuf }
| eof { state:=CODE; Parser.EOF }
and read_newline=
parse
| newline { incr_linenum lexbuf; Parser.BLANKLINE }
| "" { state:=CODE; read lexbuf }
| eof { Parser.EOF }
and read_comment=
parse
| ')' { state:=CODE; read lexbuf }
| newline { incr_linenum lexbuf; read_comment lexbuf }
| [^')'] { read_comment lexbuf }
| eof { raise (SyntaxError ("Comment not terminated")) }
and read_pstring buf=
parse
| '\n'|"\r\n" { state:=CODE; Parser.STRING (Buffer.contents buf) }
| [^ '\n' '\r']+ { Buffer.add_string buf (lexeme lexbuf); read_pstring buf lexbuf }
| eof { raise (SyntaxError ("Poetic String is not terminated")) }
and read_string buf=
parse
| '"' { state:=CODE; Parser.STRING (Buffer.contents buf) }
| '\\' '/' { Buffer.add_char buf '/'; read_string buf lexbuf }
| '\\' '\\' { Buffer.add_char buf '\\'; read_string buf lexbuf }
| '\\' 'b' { Buffer.add_char buf '\b'; read_string buf lexbuf }
| '\\' 'f' { Buffer.add_char buf '\012'; read_string buf lexbuf }
| '\\' 'n' { Buffer.add_char buf '\n'; read_string buf lexbuf }
| '\\' 'r' { Buffer.add_char buf '\r'; read_string buf lexbuf }
| '\\' 't' { Buffer.add_char buf '\t'; read_string buf lexbuf }
| [^ '"' '\\']+ { Buffer.add_string buf (Lexing.lexeme lexbuf); read_string buf lexbuf }
| _ { raise (SyntaxError ("Illegal string character: " ^ Lexing.lexeme lexbuf)) }
| eof { raise (SyntaxError ("String is not terminated")) }
{
let lex lexbuf=
match !state with
| CODE -> read lexbuf
| IS -> read_is lexbuf
| NEWLINE -> read_newline lexbuf
(* will not hit? *)
| COMMENT -> read_comment lexbuf
| PNUMBER -> read_number 0 false (Buffer.create 16) lexbuf
| PSTRING -> read_pstring (Buffer.create 16) lexbuf
| STR -> read_string (Buffer.create 16) lexbuf
}