forked from sile-typesetter/sile
-
Notifications
You must be signed in to change notification settings - Fork 0
/
sil.abnf
124 lines (107 loc) · 4.31 KB
/
sil.abnf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
; Formal grammar specification for SIL (SILE Input Language) files
;
; Uses RFC 5234 (Augmented BNF for Syntax Specifications: ABNF)
; Uses RFC 7405 (Case-Sensitive String Support in ABNF)
; IMPORTANT CAVEAT:
; Backus-Naur Form grammars (like ABNF and EBNF) do not have a way to
; express matching opening and closing tags. The grammar below does
; not express SILE's ability to skip over passthrough content until
; it hits the matching closing tag for environments.
; A master document can only have one top level content item, but we allow
; loading of fragments as well which can have any number of top level content
; items, hence valid grammar can be any number of content items.
document = *content
; Top level content can be any sequence of these things
content = environment
content =/ comment
content =/ text
content =/ braced-content
content =/ command
; Environments come in two flavors, passthrough (raw) and regular. The
; difference is what is allowed to terminate them and what escapes are needed
; for the content in the middle.
environment = %s"\begin" [ options ] "{" passthrough-command-id "}"
env-passthrough-text
%s"\end{" passthrough-command-id "}"
; ^^^^^^^^^^^^^^^^^^^^^^
; End command must match id used in begin, see caveat at top
environment =/ %s"\begin" [ options ] "{" command-id "}"
content
%s"\end{" command-id "}"
; ^^^^^^^^^^
; End command must match id used in begin, see caveat at top
; Passthrough (raw) environments can have any valid UTF-8 except the closing
; delimiter matching the opening, per the environment rule.
env-passthrough-text = *utf8-char
; Nothing to see here.
; But potentially important because it eats newlines!
comment = "%" *utf8-char CRLF
; Input strings that are not special
text = *text-char
; Input content wrapped in braces can be attached to a command or used to
; manually isolate chunks of content (e.g. to hinder ligatures).
braced-content = "{" content "}"
; As with environments, the content format may be passthrough (raw) or more SIL
; content depending on the command.
command = "\" passthrough-command-id [ options ] [ braced-passthrough-text ]
command =/ "\" command-id [ options ] [ braced-content ]
; Passthrough (raw) command text can have any valid UTF-8 except an unbalanced
; closing delimiter
braced-passthrough-text = "{"
*( braced-passthrough-text / braced-passthrough-char )
"}"
braced-passthrough-char = %x00-7A ; omit {
braced-passthrough-char =/ %x7C ; omit }
braced-passthrough-char =/ %x7E-7F ; end of utf8-1
braced-passthrough-char =/ utf8-2
braced-passthrough-char =/ utf8-3
braced-passthrough-char =/ utf8-4
options = "[" parameter *( "," parameter ) "]"
parameter = *WSP identifier *WSP "=" *WSP ( quoted-value / value ) *WSP
quoted-value = DQUOTE *quoted-value-char DQUOTE
quoted-value-char = "\" %x22
quoted-value-char =/ %x00-21 ; omit "
quoted-value-char =/ %x23-7F ; end of utf8-1
quoted-value-char =/ utf8-2
quoted-value-char =/ utf8-3
quoted-value-char =/ utf8-4
value = *value-char
value-char = %x00-21 ; omit "
value-char =/ %x23-2B ; omit ,
value-char =/ %x3C-5C ; omit ]
value-char =/ %x3E-7F ; end of utf8-1
value-char =/ utf8-2
value-char =/ utf8-3
value-char =/ utf8-4
text-char = "\" ( %x5C / %x25 / %x7B / %x7D )
text-char =/ %x00-24 ; omit %
text-char =/ %x26-5B ; omit \
text-char =/ %x5D-7A ; omit {
text-char =/ %x7C ; omit }
text-char =/ %x7E-7F ; end of utf8-1
text-char =/ utf8-2
text-char =/ utf8-3
text-char =/ utf8-4
letter = ALPHA / "_" / ":"
identifier = letter *( letter / DIGIT / "-" / "." )
passthrough-command-id = %s"ftl"
/ %s"lua"
/ %s"math"
/ %s"raw"
/ %s"script"
/ %s"sil"
/ %s"use"
/ %s"xml"
command-id = identifier
; ASCII isn't good enough for us.
utf8-char = utf8-1 / utf8-2 / utf8-3 / utf8-4
utf8-1 = %x00-7F
utf8-2 = %xC2-DF utf8-tail
utf8-3 = %xE0 %xA0-BF utf8-tail
/ %xE1-EC 2utf8-tail
/ %xED %x80-9F utf8-tail
/ %xEE-EF 2utf8-tail
utf8-4 = %xF0 %x90-BF 2utf8-tail
/ %xF1-F3 3utf8-tail
/ %xF4 %x80-8F 2utf8-tail
utf8-tail = %x80-BF