forked from backtracking/bibtex2html
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bibtex_lexer.mll
159 lines (139 loc) · 4.11 KB
/
bibtex_lexer.mll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
(**************************************************************************)
(* bibtex2html - A BibTeX to HTML translator *)
(* Copyright (C) 1997-2014 Jean-Christophe Filliâtre and Claude Marché *)
(* *)
(* This software is free software; you can redistribute it and/or *)
(* modify it under the terms of the GNU General Public *)
(* License version 2, as published by the Free Software Foundation. *)
(* *)
(* This software is distributed in the hope that it will be useful, *)
(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *)
(* *)
(* See the GNU General Public License version 2 for more details *)
(* (enclosed in the file GPL). *)
(**************************************************************************)
(*i $Id: bibtex_lexer.mll,v 1.19 2010-02-22 07:38:19 filliatr Exp $ i*)
(*s Lexer for BibTeX files. *)
{
open Lexing
open Bibtex_parser
let serious = ref false (* if we are inside a command or not *)
let brace_depth = ref 0
(*s To buffer string literals *)
let buffer = Buffer.create 8192
let reset_string_buffer () =
Buffer.reset buffer
let store_string_char c =
Buffer.add_char buffer c
let get_stored_string () =
let s = Buffer.contents buffer in
Buffer.reset buffer;
s
let start_delim = ref ' '
let check_delim d = match !start_delim, d with
| '{', '}' | '(', ')' -> ()
| _ -> failwith "closing character does not match opening"
}
let space = [' ' '\t' '\r' '\n']
rule token = parse
| space +
{ token lexbuf }
| '@' space*
([^ ' ' '\t' '\n' '\r' '{' '(']+ as entry_type) space*
(('{' | '(') as delim) space*
{ serious := true;
start_delim := delim;
match String.lowercase_ascii entry_type with
| "string" ->
Tabbrev
| "comment" ->
reset_string_buffer ();
comment lexbuf;
serious := false;
Tcomment (get_stored_string ())
| "preamble" ->
Tpreamble
| et ->
Tentry (entry_type, key lexbuf)
}
| '=' { if !serious then Tequal else token lexbuf }
| '#' { if !serious then Tsharp else token lexbuf }
| ',' { if !serious then Tcomma else token lexbuf }
| '{' { if !serious then begin
reset_string_buffer ();
brace lexbuf;
Tstring (get_stored_string ())
end else
token lexbuf }
| ('}' | ')') as d
{ if !serious then begin
check_delim d;
serious := false;
Trbrace
end else
token lexbuf }
| [^ ' ' '\t' '\n' '\r' '{' '}' '(' ')' '=' '#' ',' '"' '@']+
{ if !serious then
Tident (Lexing.lexeme lexbuf)
else
token lexbuf }
| "\""
{ if !serious then begin
reset_string_buffer ();
string lexbuf;
Tstring (get_stored_string ())
end else
token lexbuf }
| eof { EOF }
| _ { token lexbuf }
and string = parse
| '{'
{ store_string_char '{';
brace lexbuf;
store_string_char '}';
string lexbuf
}
| '"'
{ () }
| "\\\""
{ store_string_char '\\';
store_string_char '"';
string lexbuf}
| eof
{ failwith "unterminated string" }
| _
{ let c = Lexing.lexeme_char lexbuf 0 in
store_string_char c;
string lexbuf }
and brace = parse
| '{'
{ store_string_char '{';
brace lexbuf;
store_string_char '}';
brace lexbuf
}
| '}'
{ () }
| eof
{ failwith "unterminated string" }
| _
{ let c = Lexing.lexeme_char lexbuf 0 in
store_string_char c;
brace lexbuf }
and key = parse
| [^ ' ' '\t' '\n' '\r' ',']+
{ lexeme lexbuf }
| eof
| _
{ raise Parsing.Parse_error }
and comment = parse
| '{'
{ comment lexbuf; comment lexbuf }
| [^ '}' '@'] as c
{ store_string_char c;
comment lexbuf }
| eof
{ () }
| _
{ () }