-
Notifications
You must be signed in to change notification settings - Fork 0
/
lexer.mll
157 lines (135 loc) · 3.68 KB
/
lexer.mll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
(**********************************)
(* ACKNOWLEDGMENTS: Much of the regular expression logic was adapted from
the A4 source code - written by Michael Clarkson.
Levenshtein algorithm adapted from the Wikipedia page description.
https://en.wikipedia.org/wiki/Levenshtein_distance*)
(**********************************)
{
open Lexing
open Parser
open Lazy
exception Error of string
exception Autocorrect of string*string
let comment_depth = ref 0
(*******AUTOCORRECT - LEVENSHTEIN AND DICTIONARY********)
(*[minimum a b c] is the minimum of a, b and c*)
let minimum a b c =
min a (min b c)
(*[levenshtein s t] calculates the minimum number of single-character
* transformations (could be an addition, switch or deletion) required to take
* string s to string t.
* Follows the Levenshtein distance algorithm.*)
let rec levenshtein s t =
let rec distance i j =
match i, j with
|i, 0 -> force (lazy i) (*i deletions needed to empty string*)
|0, j -> force (lazy j)
|i, j -> if s.[i-1] = t.[j-1] (*last characters equal*)
then force (lazy (distance (i-1) (j-1))) (*no transformations needed - same as without last characters*)
else let d1, d2, d3 = (distance i (j-1)), (distance (i-1) j), (distance (i-1) (j-1)) in
1 + (min d1 (min d2 d3))
in distance (String.length s) (String.length t)
(*[dictionary] is a list of all the supported functions/inputs*)
let dictionary =
["pi"; "e"; "phi";
"pow"; "sqrt";
"sin"; "cos"; "tan";
"arcsin"; "arccos"; "arctan";
"ln"; "log"]
(*[autocorrect s lst] returns [Some s'] where s' is the
* string in [lst] whose
* Levenshtein distance to [s] is lowest, bounded above
* by 2. Returns None if no such string.*)
let autocorrect s l =
let rec helper s l num word =
match l with
|[] -> word
|h::t -> let lev = levenshtein s h in
if (lev <= 2) && (lev <= num) then helper s t lev (Some h)
else helper s t num word
in helper s l 2 None
}
(******************************************************************)
(* Lexer body *)
(******************************************************************)
let white = [' ' '\t']+
let digit = ['0'-'9']
let int = '-'? digit+
let letter = ['a'-'z' 'A'-'Z']
let word = letter+
let id = ('_' | letter) ('_' | letter | digit)*
let newline = ('\013'* '\010')
let blank = [' ' '\009' '\012']
let lowercase = ['a'-'z']
let identchar = ['A'-'Z' 'a'-'z' '_' '\'' '0'-'9']
let id = (lowercase | '_') identchar*
let whole_number =
['0'-'9'] ['0'-'9' '_']*
let decimal_number =
['0'-'9'] ['.'] ['0'-'9']*
let num_literal =
whole_number | decimal_number
rule token = parse
| blank+
{ token lexbuf }
| ['\n']
{ new_line lexbuf; token lexbuf }
| num_literal
{ CONST (Lexing.lexeme lexbuf) }
| "x"
{ IDENT }
| "+"
{ PLUS }
| "-"
{ MINUS }
| "*"
{ TIMES }
| "/"
{ DIV }
| "("
{ LPAREN }
| ")"
{ RPAREN }
| ","
{ COMMA }
| "^"
{ CARAT }
| "pow"
{ POW }
| "sqrt"
{ SQRT }
| "log"
{ LOG }
| "ln"
{ LN }
| "sin"
{ SIN }
| "cos"
{ COS }
| "tan"
{ TAN }
| "arcsin"
{ ARCSIN }
| "arccos"
{ ARCCOS }
| "arctan"
{ ARCTAN }
| "pi"
{ PI }
| "e"
{ NATEXP }
| "phi"
{ PHI }
| "y"
{ Y }
| "="
{ EQUALS }
| eof
{ EOF }
| word
{ let s = Lexing.lexeme lexbuf in
match autocorrect s dictionary with
|None -> raise (Error (Lexing.lexeme lexbuf))
|Some x -> raise (Autocorrect ((Lexing.lexeme lexbuf), x)) }
| _
{ raise (Error (Lexing.lexeme lexbuf))}