-
Notifications
You must be signed in to change notification settings - Fork 3
/
yaml.pgx
350 lines (287 loc) · 8.83 KB
/
yaml.pgx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
#------------------------------------------------------------------------------
# Pegex Grammar for YAML 1.2
#
# This is a PEG (top-down) grammar for the YAML 1.2 language. It is in the
# Pegex format, and can be used to construct a YAML parser in any language
# where Pegex has been ported to. (Currently Perl, Ruby and JavaScript).
#
# Compared to the official YAML spec, this grammar should be much easier to
# read and understand. It will also be fully documented, and will attempt to
# have a test suite that exercises every rule path.
#
# The overall intent of this is to have one working grammar that backs up a
# full YAML framework implementation in every programming language where YAML
# is used. If this is acheived, then a bug in YAML can be fixed in one place,
# for every language's implementaion.
#
# This grammar will go further than just parsing correct YAML. It will also
# parse for common YAML errors, and try to report the most useful error
# messages.
#------------------------------------------------------------------------------
# Notes:
# - Indentation will be done with indent / ondent / undent
# - Need to check some rules against spec for accuracy.
# - Make the grammar strict as possible until justified.
# - Need to look for common errors in the grammar, and report them.
# - Need to have tests for known errors.
%grammar yaml
%version 0.0.1
#------------------------------------------------------------------------------
# High Level Constructs
#------------------------------------------------------------------------------
# A YAML Stream is the top level rule, and accounts for the entirety of the
# text being parsed. Basically, a stream is a set of zero or more documents.
yaml-stream:
stream-start
yaml-document*
stream-end
# A YAML Document is a single node of any kind. It may start with an optional
# explicit head marker, and may be terminated with an optional explicit foot
# marker.
yaml-document:
directive-yaml?
directive-tag*
(
document-head (
/ BLANK+/ block-scalar + |
+ yaml-node
)
| document-start
yaml-node
)
+
(document-foot | document-end)
# Any kind of YAML node (except alias):
yaml-node:
yaml-props? /+/ (
| /(=[ LCURLY LSQUARE])/ # Assertion optimization
flow-collection
| block-node
)
# Any kind of YAML node (including alias):
any-node:
| yaml-alias
| yaml-node
#------------------------------------------------------------------------------
# Block Constructs
#------------------------------------------------------------------------------
# This rule identifies all the block nodes:
block-node:
| block-sequence
| block-mapping-or-scalar
| block-scalar
# A block sequence is an indented set of nodes each starting with a
# dash+space:
block-sequence:
block-sequence-indent
block-sequence-entry+ % (+ block-sequence-ondent)
block-sequence-undent
# A block sequence entry is a dash+space followed by any node:
block-sequence-entry:
block-sequence-marker
any-node
# A block mapping is an indented set of key/value pairs separated by
# colon+space:
block-mapping-or-scalar:
block-indent
# block-key-or-scalar
block-pair+ % (+ block-ondent)
block-undent
# A block mapping pair is a key/value separated by colon+space:
block-pair:
block-key
any-node
# block key scalar, has more limitations than a block value scalar.
block-key:
yaml-props?
block-key-scalar
pair-separator
# Alias for different receiver method:
block-key-scalar:
| double-quoted-scalar
| single-quoted-scalar
| block-plain-scalar
# A scalar in block form can take one of these 5 forms:
block-scalar:
| literal-scalar
| folded-scalar
| double-quoted-scalar
| single-quoted-scalar
| block-plain-scalar
#------------------------------------------------------------------------------
# Flow Constructs:
#------------------------------------------------------------------------------
# A flow node can be an alias or any one of these 3 kinds:
flow-node:
yaml-alias |
yaml-props? x (
| flow-sequence
| flow-mapping
| flow-scalar
)
# A flow collection is just flow map or seq:
flow-collection:
| flow-sequence
| flow-mapping
# A flow sequence is zero or more nodes, separated by commas, inside square
# brackets. A trailing comma is allowed.
flow-sequence:
flow-sequence-start
flow-node* %% list-separator
flow-sequence-end
# A flow mapping is key / value pairs, separated by commas, inside curly
# braces. A trailing comma is allowed.
flow-mapping:
flow-mapping-start
flow-mapping-pair* %% list-separator
flow-mapping-end
flow-entry:
| json-key flow-node
| 'xxx'
# A flow scalar only has 3 basic forms:
flow-scalar:
| double-quoted-scalar
| single-quoted-scalar
| flow-plain-scalar
# A flow mapping can have any node as key or value, but they must also be in
# flow syntax.
flow-mapping-pair:
( json-key | flow-node pair-separator )
flow-node
json-key: / double-quoted-scalar COLON (= NS) /
# Starting and ending rules for flow collections:
flow-sequence-start: / '[' x/
flow-sequence-end: / x ']' -/
flow-mapping-start: / '{' x/
flow-mapping-end: / x '}' -/
#------------------------------------------------------------------------------
# Scalar Constructs
#------------------------------------------------------------------------------
# Literal scalar.
# literal-scalar: # This rule is written in code in the Grammar class.
# It needs access to the indent level.
# Folded scalar.
# folded-scalar: # This rule is written in code in the Grammar class.
# It needs access to the indent level.
# Double quoted scalar.
double-quoted-scalar: / DOUBLE ((: BACK DOUBLE | [^ DOUBLE])*) DOUBLE /
# Single quoted scalar.
single-quoted-scalar: / SINGLE ((: SINGLE SINGLE | [^ SINGLE])*) SINGLE /
# Plain (unquoted) scalars can't start with syntax chars, and can't contain
# colon+space.
block-plain-scalar: /
(! char-non-start)
( ANY*? )
-
(= COLON WS | EOL | EOS)
/
# Plain (unquoted) scalars in flow context are more restrictive than in block
# context.
flow-plain-scalar: /
(! char-non-start)
( ANY*? )
-
(= [ chars-syntax COMMA ] | COLON SPACE | COMMA SPACE | EOL | EOS)
/
#------------------------------------------------------------------------------
# Indent Constructs:
#------------------------------------------------------------------------------
# block-indent: # This rule is written in code in the Grammar class.
# block-ondent: # This rule is written in code in the Grammar class.
# block-undent: # This rule is written in code in the Grammar class.
# block-sequence-indent: # This rule is written in code in the Grammar class.
# block-sequence-ondent: # This rule is written in code in the Grammar class.
#------------------------------------------------------------------------------
# Other Constructs:
#------------------------------------------------------------------------------
# Trigger stream events:
stream-start: /+/
stream-end: EOS
directive-yaml: /
'%YAML' B - '1.2' +
/
directive-tag: /
'%TAG' B -
BANG ( ANY*) BANG B -
( NS+ )
+
/
# A YAML header is 3 dashes followed by spaces or a newline:
document-head: '---'
# Implicit document start:
document-start: / (= ANY) /
# A YAML footer is 3 dots followed by a newline:
document-foot: / '...' eol /
# Implicit document ending:
document-end: ''
# A node's properties are a anchor and / or tag in any order.
yaml-props: /
(: yaml-anchor yaml-tag?
| yaml-tag yaml-anchor?
) ( - + )
/
# An explicit node tag:
yaml-tag: /(:('!' NS*) -)/
# A Node Anchor is a name for a node. Like '&this'.
yaml-anchor: /(:'&' ( WORD+) -)/
# A Node Alias is a reference to an anchored node. Like '*this'.
yaml-alias: /'*' ( WORD+) -/
block-sequence-marker: /
DASH (: B - | + )
/
# Mapping key / value is always separated by ': ' (colon + space):
pair-separator: /- ':' (= WS) -/
# List items separated by ',' (comma)
list-separator: / x ',' x/
# List of single chars that are YAML syntax (and thus must be avoided in
# various contexts.
chars-syntax: /
AMP
STAR
BANG
LCURLY
RCURLY
LSQUARE
RSQUARE
PERCENT
DOUBLE
SINGLE
/
# YAML's Reserved Chars:
chars-reserved: /
GRAVE
AT
/
# Plain scalar can't start with:
char-non-start: /[
chars-syntax
chars-reserved
HASH
]/
#------------------------------------------------------------------------------
# Whitespace Rules:
#------------------------------------------------------------------------------
# Definition of the '-' rule.
# Ignore whitespace and comment up to EOL:
ws1: /
(:
BLANK*
(:
(: (?<= WS) | (?<= ^) )
HASH ANY*
)?
)
/
# Definition of the '+' rule.
# Ignore (possibly multiple) comment lines:
ws2: /(: - eol)*/
# Assert blank (space or tab):
B: /(= BLANK)/
# 'x' is ws rule to eat blanks after '+' (ws2):
x: /+ -/
# YAML might not have a final newline:
eol: / (: EOL | EOS ) /
# Make sure special 'ws' rule is never used:
ws: 'XXX'
# Vim Helpers, until we get `pegex.vim` mode.
# vim: set lisp sw=2: