-
Notifications
You must be signed in to change notification settings - Fork 15
/
JSON.scala
247 lines (209 loc) · 7.98 KB
/
JSON.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
/* Copyright 2020 EPFL, Lausanne
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package example.json
import scala.language.implicitConversions
import scallion._
import silex._
/* In this example, we show a lexer and parser for JSON. */
// First, we define the token for our language.
sealed abstract class Token {
// We store the indices at which the tokens
// starts (inclusive) and ends (exclusive).
val range: (Int, Int)
}
case class SeparatorToken(value: Char, range: (Int, Int)) extends Token
case class BooleanToken(value: Boolean, range: (Int, Int)) extends Token
case class NumberToken(value: Double, range: (Int, Int)) extends Token
case class StringToken(value: String, range: (Int, Int)) extends Token
case class NullToken(range: (Int, Int)) extends Token
case class SpaceToken(range: (Int, Int)) extends Token
case class UnknownToken(content: String, range: (Int, Int)) extends Token
// Then, we define the lexer.
// The lexer converts sequences of characters into tokens.
object JSONLexer extends Lexers with CharLexers {
type Token = example.json.Token
type Position = Int
val lexer = Lexer(
// Separator
oneOf("[]{},:")
|> { (cs, r) => SeparatorToken(cs.head, r) },
// Space
many1(whiteSpace)
|> { (_, r) => SpaceToken(r) },
// Booleans
word("true")
|> { (_, r) => BooleanToken(true, r) },
word("false")
|> { (_, r) => BooleanToken(false, r) },
// Null
word("null")
|> { (_, r) => NullToken(r) },
// Strings
elem('"') ~
many {
elem(c => c != '"' && c != '\\' && !c.isControl) |
elem('\\') ~ (oneOf("\"\\/bfnrt") | elem('u') ~ hex.times(4))
} ~
elem('"')
|> { (cs, r) => {
val string = cs.mkString
StringToken(string.slice(1, string.length - 1), r)
}},
// Numbers
opt {
elem('-')
} ~
{
elem('0') |
nonZero ~ many(digit)
} ~
opt {
elem('.') ~ many1(digit)
} ~
opt {
oneOf("eE") ~
opt(oneOf("+-")) ~
many1(digit)
}
|> { (cs, r) => NumberToken(cs.mkString.toDouble, r) }
) onError {
// When no regular expression match, we return this token.
(cs, r) => UnknownToken(cs.mkString, r)
}
// The apply method converts an iterator over characters
// to an iterator over tokens.
def apply(it: Iterator[Char]): Iterator[Token] = {
// We create a `Source` from the iterator,
// which keeps track of positions.
val source = Source.fromIterator(it, IndexPositioner)
// We apply the lexer.
// The spawn method creates a new thread for the
// lexer so that in can run in parallel with the
// parser.
val tokens = lexer.spawn(source)
// We filter out all spaces.
tokens.filter(!_.isInstanceOf[SpaceToken])
}
}
// Then, we define classes of tokens.
// Each class corresponds to a group of tokens, and
// abstracts away all irrelevant details.
// In the parser, tokens can be accepted or rejected only based
// on their token class.
sealed abstract class TokenClass(repr: String) {
override def toString = repr
}
case class SeparatorClass(value: Char) extends TokenClass(value.toString)
case object BooleanClass extends TokenClass("<boolean>")
case object NumberClass extends TokenClass("<number>")
case object StringClass extends TokenClass("<string>")
case object NullClass extends TokenClass("<null>")
case object NoClass extends TokenClass("<error>")
// Then, we define JSON values, which will be the output of the parser.
sealed abstract class Value {
val range: (Int, Int)
}
case class ArrayValue(elems: Seq[Value], range: (Int, Int)) extends Value
case class ObjectValue(elems: Seq[(StringValue, Value)], range: (Int, Int)) extends Value
case class BooleanValue(value: Boolean, range: (Int, Int)) extends Value
case class NumberValue(value: Double, range: (Int, Int)) extends Value
case class StringValue(value: String, range: (Int, Int)) extends Value
case class NullValue(range: (Int, Int)) extends Value
// Then, we define the JSON Parser.
object JSONParser extends Parsers {
type Token = example.json.Token
type Kind = TokenClass
import Implicits._
// We assign to each token a single token class.
override def getKind(token: Token): TokenClass = token match {
case SeparatorToken(value, _) => SeparatorClass(value)
case BooleanToken(_, _) => BooleanClass
case NumberToken(_, _) => NumberClass
case StringToken(_, _) => StringClass
case NullToken(_) => NullClass
case _ => NoClass
}
// Syntax for booleans.
// We accept tokens of the class `BooleanClass`,
// and turn them into proper `Value`s.
val booleanValue: Syntax[Value] = accept(BooleanClass) {
case BooleanToken(value, range) => BooleanValue(value, range)
}
// Syntax for numbers.
val numberValue: Syntax[Value] = accept(NumberClass) {
case NumberToken(value, range) => NumberValue(value, range)
}
// Syntax for strings.
val stringValue: Syntax[StringValue] = accept(StringClass) {
case StringToken(value, range) => StringValue(value, range)
}
// Syntax for null.
val nullValue: Syntax[Value] = accept(NullClass) {
case NullToken(range) => NullValue(range)
}
// Implicit conversion from a single char to
// syntax for a separator token.
implicit def separator(char: Char): Syntax[Token] = elem(SeparatorClass(char))
// Defines the syntax for arrays.
lazy val arrayValue: Syntax[Value] =
('[' ~ repsep(value, ',') ~ ']').map {
case start ~ vs ~ end => ArrayValue(vs, (start.range._1, end.range._2))
}
// Defines the syntax for key-value bindings.
lazy val binding: Syntax[(StringValue, Value)] =
(stringValue ~ ':' ~ value).map {
case key ~ _ ~ value => (key, value)
}
// Defines the syntax for objects.
lazy val objectValue: Syntax[Value] =
('{' ~ repsep(binding, ',') ~ '}').map {
case start ~ bs ~ end => ObjectValue(bs, (start.range._1, end.range._2))
}
// Defines the complete syntax for JSON.
// The `recursive` combinator is used since the syntax is recursive.
lazy val value: Syntax[Value] = recursive {
// We define the various cases.
oneOf(
arrayValue,
objectValue,
booleanValue,
numberValue,
stringValue.up[Value], // We upcast the produced value from `StringValue` to `Value`.
nullValue)
}
// Creates the LL1 parser from the syntax.
val parser = Parser(value, false)
// Turn the iterator of tokens into a value, if possible.
def apply(it: Iterator[Token]): Either[String, Value] = parser(it) match {
case Parsed(value, rest) => Right(value) // The parse was successful.
case UnexpectedToken(token, rest) => // The parse was unsuccessful due to a wrong token.
Left("Unexpected " + token + ", expected one of " + rest.first.mkString(", "))
case UnexpectedEnd(rest) => // The parse was unsuccessful due to the end of input.
Left("Unexpected end of input. Quickest way to end is \"" +
Enumerator(rest.syntax).next().mkString("") + "\"")
}
}
object JSON {
def main(args: Array[String]) {
println("Parsing some valid JSON example strings.")
println(JSONParser(JSONLexer("""{"foo":"bar", "baz":null}""".iterator)))
println(JSONParser(JSONLexer("""[1, [true, false, {}], [3, [], [5, 6]]]""".iterator)))
println("Parsing some invalid JSON example strings.")
println(JSONParser(JSONLexer("""{1:2}""".iterator)))
println(JSONParser(JSONLexer("""[1, 2 3]""".iterator)))
println(JSONParser(JSONLexer("""[1, 2, 3}""".iterator)))
println(JSONParser(JSONLexer("""[1, {"foo": [{"bar" """.iterator)))
}
}