-
Notifications
You must be signed in to change notification settings - Fork 0
/
tokenizer.js
101 lines (78 loc) · 2.07 KB
/
tokenizer.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
// const latexCode = '/sqrt {356} 3 /frac 1 {/sqrt 3}'
// const latexCode = '-b\\pm \\sqrt {{b^2}-4ac}测试'
// const latexCode = '\\frac{{-b\\pm \\sqrt {{b^2}-4ac}}}{{2a}}'
const tokenizer = (input = '') => {
let current = 0
let tokens = []
while (current < input.length) {
let char = input[current];
let WHITESPACE = /\s/;
if (WHITESPACE.test(char)) {
current ++
continue
}
// braces
if (char === '{' || char === '}') {
tokens.push({ type: 'wrapper', value: char})
current ++
continue
}
if (char === '[' || char === ']') {
tokens.push({ type: 'weakWrapper', value: char})
current ++
continue
}
if (char === '(' || char === ')') {
tokens.push({ type: 'block', value: char})
current ++
continue
}
const OPERATORS = /[\+\-\=\_\^]/
if (OPERATORS.test(char)) {
tokens.push({ type: 'operator', value: char})
current ++
continue
}
// let NUMBERS = /[0-9]/;
// if (NUMBERS.test(char)) {
// let value = ''
// while (NUMBERS.test(char)) {
// value += char
// char = input[++ current]
// }
// tokens.push({ type: 'number', value })
// continue
// }
if (char === "\\") {
let value = char
const FORMULA = /[a-z]/i
char = input[++current]
while (FORMULA.test(char)) {
value += char
char = input[++current]
}
tokens.push({type: 'formula', value})
continue
}
// debugger
const PARAMS = /\w/
if (PARAMS.test(char)) {
let value = char
char = input[++current]
while (current < input.length && PARAMS.test(char)) {
value += char
char = input[++current]
}
tokens.push({type: 'params', value})
continue
}
tokens.push({type: 'other', value: char})
current ++
// throw new TypeError('I dont know what this character is: ' + char)
}
return tokens
}
// const result = tokenizer(latexCode)
// console.log(latexCode)
// console.log(result)
module.exports = tokenizer