From 438b362104c8852085a912d5fa31800b0dc93aff Mon Sep 17 00:00:00 2001
From: David Sherret <dsherret@users.noreply.github.com>
Date: Sat, 19 Oct 2024 13:21:30 -0400
Subject: [PATCH] feat: improve error struct (#42)

---
 .github/workflows/ci.yml      |   8 +-
 .github/workflows/release.yml |   4 +-
 Cargo.toml                    |   2 +
 LICENSE                       |   2 +-
 README.md                     |   4 +
 dprint.json                   |   3 +-
 rust-toolchain.toml           |   2 +-
 src/errors.rs                 | 179 +++++++++++++++++++++++++++++-----
 src/parse_to_ast.rs           |  84 +++++++++-------
 src/parse_to_value.rs         |   8 +-
 src/scanner.rs                |  47 ++++-----
 src/serde.rs                  |   3 +-
 src/string.rs                 |   2 +-
 13 files changed, 250 insertions(+), 98 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 91b08fd..d8b1157 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -30,10 +30,10 @@ jobs:
       run: |
         cargo test --features serde
         cargo test --features preserve_order
-        cargo test --verbose --all-features
+        cargo test --all-features
     - name: Test release
       if: matrix.config.kind == 'test_release'
-      run: cargo test --release --verbose --all-features
+      run: cargo test --release --all-features
 
       # CARGO PUBLISH
     - name: Cargo login
@@ -48,14 +48,14 @@ jobs:
     name: Benchmarks
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Install latest nightly
       uses: actions-rs/toolchain@v1
       with:
         toolchain: nightly
         override: true
     - name: Cache cargo
-      uses: actions/cache@v2
+      uses: actions/cache@v4
       with:
         path: |
           ~/.cargo/registry
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index dc71bc1..3bf4793 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -20,11 +20,11 @@ jobs:
 
     steps:
       - name: Clone repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           token: ${{ secrets.GH_DPRINTBOT_PAT }}
 
-      - uses: denoland/setup-deno@v1
+      - uses: denoland/setup-deno@v2
       - uses: dsherret/rust-toolchain-file@v1
 
       - name: Bump version and tag
diff --git a/Cargo.toml b/Cargo.toml
index 6f26a4c..8a8eb0f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,11 +13,13 @@ all-features = true
 [dependencies]
 indexmap = { version = "2.2.6", optional = true }
 serde_json = { version = "1.0", optional = true }
+unicode-width = { version = "0.2.0", optional = true }
 
 [features]
 cst = []
 preserve_order = ["indexmap", "serde_json/preserve_order"]
 serde = ["serde_json"]
+error_unicode_width = ["unicode-width"]
 
 [dev-dependencies]
 pretty_assertions = "1.0.0"
diff --git a/LICENSE b/LICENSE
index 5932947..3457cf1 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 The MIT License (MIT)
 
-Copyright (c) 2020-2021 David Sherret
+Copyright (c) 2020 David Sherret
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/README.md b/README.md
index 09bd781..97cf306 100644
--- a/README.md
+++ b/README.md
@@ -92,3 +92,7 @@ let json_value = parse_to_value(text, &ParseOptions {
   allow_trailing_commas: false,
 })?;
 ```
+
+## Error column number with unicode-width
+
+To to get more accurate display column numbers in error messages, enable the `error_unicode_width` cargo feature, which will pull in and use the [unicode-width](https://crates.io/crates/unicode-width) dependency internally. Otherwise it will use the character count, which isn't as accurate of a number, but will probably be good enough in most cases.
diff --git a/dprint.json b/dprint.json
index 6d82f1a..ec44d55 100644
--- a/dprint.json
+++ b/dprint.json
@@ -13,6 +13,7 @@
   ],
   "plugins": [
     "https://plugins.dprint.dev/markdown-0.17.8.wasm",
-    "https://plugins.dprint.dev/exec-0.5.0.json@8d9972eee71fa1590e04873540421f3eda7674d0f1aae3d7c788615e7b7413d0"
+    "https://plugins.dprint.dev/exec-0.5.0.json@8d9972eee71fa1590e04873540421f3eda7674d0f1aae3d7c788615e7b7413d0",
+    "https://plugins.dprint.dev/json-0.19.3.wasm"
   ]
 }
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
index 251f956..87499a9 100644
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,3 +1,3 @@
 [toolchain]
-channel = "1.81.0"
+channel = "1.82.0"
 components = ["clippy", "rustfmt"]
diff --git a/src/errors.rs b/src/errors.rs
index 68a65c9..e6de239 100644
--- a/src/errors.rs
+++ b/src/errors.rs
@@ -1,43 +1,161 @@
-use std::error::Error;
 use std::fmt;
 
+use crate::ParseStringErrorKind;
+
 use super::common::Range;
 
-/// Error that could occur while parsing or tokenizing.
-#[derive(Debug, PartialEq)]
-pub struct ParseError {
-  /// Start and end position of the error.
-  pub range: Range,
-  /// Error message.
-  pub message: String,
-  /// Message with the range text.
-  display_message: String,
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub enum ParseErrorKind {
+  CommentsNotAllowed,
+  ExpectedColonAfterObjectKey,
+  ExpectedObjectValue,
+  ExpectedDigit,
+  ExpectedDigitFollowingNegativeSign,
+  ExpectedPlusMinusOrDigitInNumberLiteral,
+  ExpectedStringObjectProperty,
+  MultipleRootJsonValues,
+  String(ParseStringErrorKind),
+  TrailingCommasNotAllowed,
+  UnexpectedCloseBrace,
+  UnexpectedCloseBracket,
+  UnexpectedColon,
+  UnexpectedComma,
+  UnexpectedToken,
+  UnexpectedTokenInObject,
+  UnexpectedWord,
+  UnterminatedArray,
+  UnterminatedCommentBlock,
+  UnterminatedObject,
 }
 
-impl ParseError {
-  pub(crate) fn new(range: Range, message: &str, file_text: &str) -> ParseError {
-    let display_message = get_message_with_range(range, message, file_text);
-    ParseError {
-      message: message.to_string(),
-      range,
-      display_message,
+impl std::fmt::Display for ParseErrorKind {
+  fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+    use ParseErrorKind::*;
+    match self {
+      CommentsNotAllowed => {
+        write!(f, "Comments are not allowed")
+      }
+      ExpectedColonAfterObjectKey => {
+        write!(f, "Expected colon after the string or word in object property")
+      }
+      ExpectedDigit => {
+        write!(f, "Expected digit")
+      }
+      ExpectedDigitFollowingNegativeSign => {
+        write!(f, "Expected digit following negative sign")
+      }
+      ExpectedPlusMinusOrDigitInNumberLiteral => {
+        write!(f, "Expected plus, minus, or digit in number literal")
+      }
+      ExpectedObjectValue => {
+        write!(f, "Expected value after colon in object property")
+      }
+      ExpectedStringObjectProperty => {
+        write!(f, "Expected string for object property")
+      }
+      MultipleRootJsonValues => {
+        write!(f, "Text cannot contain more than one JSON value")
+      }
+      String(kind) => kind.fmt(f),
+      TrailingCommasNotAllowed => {
+        write!(f, "Trailing commas are not allowed")
+      }
+      UnexpectedCloseBrace => {
+        write!(f, "Unexpected close brace")
+      }
+      UnexpectedCloseBracket => {
+        write!(f, "Unexpected close bracket")
+      }
+      UnexpectedColon => {
+        write!(f, "Unexpected colon")
+      }
+      UnexpectedComma => {
+        write!(f, "Unexpected comma")
+      }
+      UnexpectedWord => {
+        write!(f, "Unexpected word")
+      }
+      UnexpectedToken => {
+        write!(f, "Unexpected token")
+      }
+      UnexpectedTokenInObject => {
+        write!(f, "Unexpected token in object")
+      }
+      UnterminatedArray => {
+        write!(f, "Unterminated array")
+      }
+      UnterminatedCommentBlock => {
+        write!(f, "Unterminated comment block")
+      }
+      UnterminatedObject => {
+        write!(f, "Unterminated object")
+      }
     }
   }
 }
 
-impl fmt::Display for ParseError {
-  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-    write!(f, "{}", self.display_message)
+#[derive(Debug, Clone, PartialEq)]
+struct ParseErrorInner {
+  range: Range,
+  line_display: usize,
+  column_display: usize,
+  kind: ParseErrorKind,
+}
+
+/// Error that could occur while parsing or tokenizing.
+#[derive(Debug, Clone, PartialEq)]
+pub struct ParseError(Box<ParseErrorInner>);
+
+impl std::error::Error for ParseError {}
+
+impl ParseError {
+  pub(crate) fn new(range: Range, kind: ParseErrorKind, file_text: &str) -> ParseError {
+    let (line_display, column_display) = get_line_and_column_display(range, file_text);
+    ParseError(Box::new(ParseErrorInner {
+      range,
+      line_display,
+      column_display,
+      kind,
+    }))
+  }
+
+  /// Start and end position of the error.
+  pub fn range(&self) -> Range {
+    self.0.range
+  }
+
+  /// 1-indexed line number the error occurred on.
+  pub fn line_display(&self) -> usize {
+    self.0.line_display
+  }
+
+  /// 1-indexed column number the error occurred on.
+  ///
+  /// Note: Use the `error_unicode_width` feature to get the correct column
+  /// number for Unicode characters on the line, otherwise this is just the
+  /// number of characters by default.
+  pub fn column_display(&self) -> usize {
+    self.0.column_display
+  }
+
+  /// Error message.
+  pub fn kind(&self) -> &ParseErrorKind {
+    &self.0.kind
   }
 }
 
-impl Error for ParseError {
-  fn description(&self) -> &str {
-    &self.display_message
+impl fmt::Display for ParseError {
+  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+    let inner = &*self.0;
+    write!(
+      f,
+      "{} on line {} column {}",
+      inner.kind, inner.line_display, inner.column_display
+    )
   }
 }
 
-fn get_message_with_range(range: Range, message: &str, file_text: &str) -> String {
+fn get_line_and_column_display(range: Range, file_text: &str) -> (usize, usize) {
   let mut line_index = 0;
   let mut column_index = 0;
   for c in file_text[..range.start].chars() {
@@ -45,8 +163,17 @@ fn get_message_with_range(range: Range, message: &str, file_text: &str) -> Strin
       line_index += 1;
       column_index = 0;
     } else {
-      column_index += 1;
+      #[cfg(feature = "error_unicode_width")]
+      {
+        if let Some(width) = unicode_width::UnicodeWidthChar::width_cjk(c) {
+          column_index += width;
+        }
+      }
+      #[cfg(not(feature = "error_unicode_width"))]
+      {
+        column_index += 1;
+      }
     }
   }
-  format!("{} on line {} column {}.", message, line_index + 1, column_index + 1,)
+  (line_index + 1, column_index + 1)
 }
diff --git a/src/parse_to_ast.rs b/src/parse_to_ast.rs
index c3cc3f8..7fd8510 100644
--- a/src/parse_to_ast.rs
+++ b/src/parse_to_ast.rs
@@ -132,17 +132,17 @@ impl<'a> Context<'a> {
     }
   }
 
-  pub fn create_error(&self, message: &str) -> ParseError {
-    self.scanner.create_error_for_current_token(message)
+  pub fn create_error(&self, kind: ParseErrorKind) -> ParseError {
+    self.scanner.create_error_for_current_token(kind)
   }
 
-  pub fn create_error_for_current_range(&mut self, message: &str) -> ParseError {
+  pub fn create_error_for_current_range(&mut self, kind: ParseErrorKind) -> ParseError {
     let range = self.end_range();
-    self.create_error_for_range(range, message)
+    self.create_error_for_range(range, kind)
   }
 
-  pub fn create_error_for_range(&self, range: Range, message: &str) -> ParseError {
-    self.scanner.create_error_for_range(range, message)
+  pub fn create_error_for_range(&self, range: Range, kind: ParseErrorKind) -> ParseError {
+    self.scanner.create_error_for_range(range, kind)
   }
 
   fn scan_handling_comments(&mut self) -> Result<Option<Token<'a>>, ParseError> {
@@ -181,7 +181,7 @@ impl<'a> Context<'a> {
 
   fn handle_comment(&mut self, comment: Comment<'a>) -> Result<(), ParseError> {
     if !self.allow_comments {
-      return Err(self.create_error("Comments are not allowed"));
+      return Err(self.create_error(ParseErrorKind::CommentsNotAllowed));
     }
 
     if self.comments.is_some() {
@@ -236,7 +236,7 @@ pub fn parse_to_ast<'a>(
   let value = parse_value(&mut context)?;
 
   if context.scan()?.is_some() {
-    return Err(context.create_error("Text cannot contain more than one JSON value"));
+    return Err(context.create_error(ParseErrorKind::MultipleRootJsonValues));
   }
 
   debug_assert!(context.range_stack.is_empty());
@@ -258,11 +258,11 @@ fn parse_value<'a>(context: &mut Context<'a>) -> Result<Option<Value<'a>>, Parse
       Token::Boolean(value) => Ok(Some(Value::BooleanLit(create_boolean_lit(context, value)))),
       Token::Number(value) => Ok(Some(Value::NumberLit(create_number_lit(context, value)))),
       Token::Null => return Ok(Some(Value::NullKeyword(create_null_keyword(context)))),
-      Token::CloseBracket => Err(context.create_error("Unexpected close bracket")),
-      Token::CloseBrace => Err(context.create_error("Unexpected close brace")),
-      Token::Comma => Err(context.create_error("Unexpected comma")),
-      Token::Colon => Err(context.create_error("Unexpected colon")),
-      Token::Word(_) => Err(context.create_error("Unexpected word")),
+      Token::CloseBracket => Err(context.create_error(ParseErrorKind::UnexpectedCloseBracket)),
+      Token::CloseBrace => Err(context.create_error(ParseErrorKind::UnexpectedCloseBrace)),
+      Token::Comma => Err(context.create_error(ParseErrorKind::UnexpectedComma)),
+      Token::Colon => Err(context.create_error(ParseErrorKind::UnexpectedColon)),
+      Token::Word(_) => Err(context.create_error(ParseErrorKind::UnexpectedWord)),
       Token::CommentLine(_) => unreachable!(),
       Token::CommentBlock(_) => unreachable!(),
     },
@@ -285,8 +285,8 @@ fn parse_object<'a>(context: &mut Context<'a>) -> Result<Object<'a>, ParseError>
       Some(Token::Word(prop_name)) | Some(Token::Number(prop_name)) => {
         properties.push(parse_object_property(context, PropName::Word(prop_name))?);
       }
-      None => return Err(context.create_error_for_current_range("Unterminated object")),
-      _ => return Err(context.create_error("Unexpected token in object")),
+      None => return Err(context.create_error_for_current_range(ParseErrorKind::UnterminatedObject)),
+      _ => return Err(context.create_error(ParseErrorKind::UnexpectedTokenInObject)),
     }
 
     // skip the comma
@@ -294,7 +294,7 @@ fn parse_object<'a>(context: &mut Context<'a>) -> Result<Object<'a>, ParseError>
       let comma_range = context.create_range_from_last_token();
       if let Some(Token::CloseBrace) = context.scan()? {
         if !context.allow_trailing_commas {
-          return Err(context.create_error_for_range(comma_range, "Trailing commas are not allowed"));
+          return Err(context.create_error_for_range(comma_range, ParseErrorKind::TrailingCommasNotAllowed));
         }
       }
     }
@@ -320,14 +320,14 @@ fn parse_object_property<'a>(context: &mut Context<'a>, prop_name: PropName<'a>)
       if context.allow_loose_object_property_names {
         ObjectPropName::Word(create_word(context, prop_name))
       } else {
-        return Err(context.create_error("Expected string for object property"));
+        return Err(context.create_error(ParseErrorKind::ExpectedStringObjectProperty));
       }
     }
   };
 
   match context.scan()? {
     Some(Token::Colon) => {}
-    _ => return Err(context.create_error("Expected a colon after the string or word in an object property")),
+    _ => return Err(context.create_error(ParseErrorKind::ExpectedColonAfterObjectKey)),
   }
 
   context.scan()?;
@@ -339,7 +339,7 @@ fn parse_object_property<'a>(context: &mut Context<'a>, prop_name: PropName<'a>)
       name,
       value,
     }),
-    None => Err(context.create_error("Expected value after colon in object property")),
+    None => Err(context.create_error(ParseErrorKind::ExpectedObjectValue)),
   }
 }
 
@@ -353,10 +353,10 @@ fn parse_array<'a>(context: &mut Context<'a>) -> Result<Array<'a>, ParseError> {
   loop {
     match context.token() {
       Some(Token::CloseBracket) => break,
-      None => return Err(context.create_error_for_current_range("Unterminated array")),
+      None => return Err(context.create_error_for_current_range(ParseErrorKind::UnterminatedArray)),
       _ => match parse_value(context)? {
         Some(value) => elements.push(value),
-        None => return Err(context.create_error_for_current_range("Unterminated array")),
+        None => return Err(context.create_error_for_current_range(ParseErrorKind::UnterminatedArray)),
       },
     }
 
@@ -365,7 +365,7 @@ fn parse_array<'a>(context: &mut Context<'a>) -> Result<Array<'a>, ParseError> {
       let comma_range = context.create_range_from_last_token();
       if let Some(Token::CloseBracket) = context.scan()? {
         if !context.allow_trailing_commas {
-          return Err(context.create_error_for_range(comma_range, "Trailing commas are not allowed"));
+          return Err(context.create_error_for_range(comma_range, ParseErrorKind::TrailingCommasNotAllowed));
         }
       }
     }
@@ -416,51 +416,52 @@ fn create_null_keyword(context: &Context) -> NullKeyword {
 #[cfg(test)]
 mod tests {
   use super::*;
+  use pretty_assertions::assert_eq;
 
   #[test]
   fn it_should_error_when_has_multiple_values() {
     assert_has_error(
       "[][]",
-      "Text cannot contain more than one JSON value on line 1 column 3.",
+      "Text cannot contain more than one JSON value on line 1 column 3",
     );
   }
 
   #[test]
   fn it_should_error_when_object_is_not_terminated() {
-    assert_has_error("{", "Unterminated object on line 1 column 1.");
+    assert_has_error("{", "Unterminated object on line 1 column 1");
   }
 
   #[test]
   fn it_should_error_when_object_has_unexpected_token() {
-    assert_has_error("{ [] }", "Unexpected token in object on line 1 column 3.");
+    assert_has_error("{ [] }", "Unexpected token in object on line 1 column 3");
   }
 
   #[test]
   fn it_should_error_when_object_has_two_non_string_tokens() {
     assert_has_error(
       "{ asdf asdf: 5 }",
-      "Expected a colon after the string or word in an object property on line 1 column 8.",
+      "Expected colon after the string or word in object property on line 1 column 8",
     );
   }
 
   #[test]
   fn it_should_error_when_array_is_not_terminated() {
-    assert_has_error("[", "Unterminated array on line 1 column 1.");
+    assert_has_error("[", "Unterminated array on line 1 column 1");
   }
 
   #[test]
   fn it_should_error_when_array_has_unexpected_token() {
-    assert_has_error("[:]", "Unexpected colon on line 1 column 2.");
+    assert_has_error("[:]", "Unexpected colon on line 1 column 2");
   }
 
   #[test]
   fn it_should_error_when_comment_block_not_closed() {
-    assert_has_error("/* test", "Unterminated comment block on line 1 column 1.");
+    assert_has_error("/* test", "Unterminated comment block on line 1 column 1");
   }
 
   #[test]
   fn it_should_error_when_string_lit_not_closed() {
-    assert_has_error("\" test", "Unterminated string literal on line 1 column 1.");
+    assert_has_error("\" test", "Unterminated string literal on line 1 column 1");
   }
 
   fn assert_has_error(text: &str, message: &str) {
@@ -475,33 +476,34 @@ mod tests {
   fn strict_should_error_object_trailing_comma() {
     assert_has_strict_error(
       r#"{ "test": 5, }"#,
-      "Trailing commas are not allowed on line 1 column 12.",
+      "Trailing commas are not allowed on line 1 column 12",
     );
   }
 
   #[test]
   fn strict_should_error_array_trailing_comma() {
-    assert_has_strict_error(r#"[ "test", ]"#, "Trailing commas are not allowed on line 1 column 9.");
+    assert_has_strict_error(r#"[ "test", ]"#, "Trailing commas are not allowed on line 1 column 9");
   }
 
   #[test]
   fn strict_should_error_comment_line() {
-    assert_has_strict_error(r#"[ "test" ] // 1"#, "Comments are not allowed on line 1 column 12.");
+    assert_has_strict_error(r#"[ "test" ] // 1"#, "Comments are not allowed on line 1 column 12");
   }
 
   #[test]
   fn strict_should_error_comment_block() {
-    assert_has_strict_error(r#"[ "test" /* 1 */]"#, "Comments are not allowed on line 1 column 10.");
+    assert_has_strict_error(r#"[ "test" /* 1 */]"#, "Comments are not allowed on line 1 column 10");
   }
 
   #[test]
   fn strict_should_error_word_property() {
     assert_has_strict_error(
       r#"{ word: 5 }"#,
-      "Expected string for object property on line 1 column 3.",
+      "Expected string for object property on line 1 column 3",
     );
   }
 
+  #[track_caller]
   fn assert_has_strict_error(text: &str, message: &str) {
     let result = parse_to_ast(
       text,
@@ -559,4 +561,16 @@ mod tests {
     let comments = result.comments.unwrap();
     assert_eq!(comments.len(), 2); // for both positions, but it's the same comment
   }
+
+  #[cfg(not(feature = "error_unicode_width"))]
+  #[test]
+  fn error_correct_line_column_unicode_width() {
+    assert_has_strict_error(r#"["🧑‍🦰", ["#, "Unterminated array on line 1 column 9");
+  }
+
+  #[cfg(feature = "error_unicode_width")]
+  #[test]
+  fn error_correct_line_column_unicode_width() {
+    assert_has_strict_error(r#"["🧑‍🦰", ["#, "Unterminated array on line 1 column 10");
+  }
 }
diff --git a/src/parse_to_value.rs b/src/parse_to_value.rs
index 510d553..281e27b 100644
--- a/src/parse_to_value.rs
+++ b/src/parse_to_value.rs
@@ -57,6 +57,8 @@ fn handle_object(obj: ast::Object) -> JsonObject {
 
 #[cfg(test)]
 mod tests {
+  use crate::errors::ParseErrorKind;
+
   use super::*;
   use std::borrow::Cow;
 
@@ -147,8 +149,8 @@ mod tests {
     let err = parse_to_value("{\n  \"a\":\u{200b}5 }", &Default::default())
       .err()
       .unwrap();
-    assert_eq!(err.range.start, 8);
-    assert_eq!(err.range.end, 11);
-    assert_eq!(err.message, "Unexpected token");
+    assert_eq!(err.range().start, 8);
+    assert_eq!(err.range().end, 11);
+    assert_eq!(err.kind().clone(), ParseErrorKind::UnexpectedToken);
   }
 }
diff --git a/src/scanner.rs b/src/scanner.rs
index f16e19b..4f1e8a6 100644
--- a/src/scanner.rs
+++ b/src/scanner.rs
@@ -76,7 +76,7 @@ impl<'a> Scanner<'a> {
         '/' => match self.peek_char() {
           Some('/') => Ok(self.parse_comment_line()),
           Some('*') => self.parse_comment_block(),
-          _ => Err(self.create_error_for_current_token("Unexpected token")),
+          _ => Err(self.create_error_for_current_token(ParseErrorKind::UnexpectedToken)),
         },
         _ => {
           if current_char == '-' || self.is_digit() {
@@ -120,15 +120,15 @@ impl<'a> Scanner<'a> {
     self.current_token.as_ref().map(|x| x.to_owned())
   }
 
-  pub(super) fn create_error_for_current_token(&self, message: &str) -> ParseError {
-    self.create_error_for_start(self.token_start, message)
+  pub(super) fn create_error_for_current_token(&self, kind: ParseErrorKind) -> ParseError {
+    self.create_error_for_start(self.token_start, kind)
   }
 
-  pub(super) fn create_error_for_current_char(&self, message: &str) -> ParseError {
-    self.create_error_for_start(self.byte_index, message)
+  pub(super) fn create_error_for_current_char(&self, kind: ParseErrorKind) -> ParseError {
+    self.create_error_for_start(self.byte_index, kind)
   }
 
-  pub(super) fn create_error_for_start(&self, start: usize, message: &str) -> ParseError {
+  pub(super) fn create_error_for_start(&self, start: usize, kind: ParseErrorKind) -> ParseError {
     let range = Range {
       start,
       end: if let Some(c) = self.file_text[self.byte_index..].chars().next() {
@@ -137,18 +137,18 @@ impl<'a> Scanner<'a> {
         self.file_text.len()
       },
     };
-    self.create_error_for_range(range, message)
+    self.create_error_for_range(range, kind)
   }
 
-  pub(super) fn create_error_for_range(&self, range: Range, message: &str) -> ParseError {
-    ParseError::new(range, message, self.file_text)
+  pub(super) fn create_error_for_range(&self, range: Range, kind: ParseErrorKind) -> ParseError {
+    ParseError::new(range, kind, self.file_text)
   }
 
   fn parse_string(&mut self) -> Result<Token<'a>, ParseError> {
     crate::string::parse_string_with_char_provider(self)
       .map(Token::String)
       // todo(dsherret): don't convert the error kind to a string here
-      .map_err(|err| self.create_error_for_start(err.byte_index, &err.kind.to_string()))
+      .map_err(|err| self.create_error_for_start(err.byte_index, ParseErrorKind::String(err.kind)))
   }
 
   fn parse_number(&mut self) -> Result<Token<'a>, ParseError> {
@@ -166,14 +166,14 @@ impl<'a> Scanner<'a> {
         self.move_next_char();
       }
     } else {
-      return Err(self.create_error_for_current_char("Expected a digit to follow a negative sign"));
+      return Err(self.create_error_for_current_char(ParseErrorKind::ExpectedDigitFollowingNegativeSign));
     }
 
     if self.is_decimal_point() {
       self.move_next_char();
 
       if !self.is_digit() {
-        return Err(self.create_error_for_current_char("Expected a digit"));
+        return Err(self.create_error_for_current_char(ParseErrorKind::ExpectedDigit));
       }
 
       while self.is_digit() {
@@ -187,12 +187,12 @@ impl<'a> Scanner<'a> {
           Some('-') | Some('+') => {
             self.move_next_char();
             if !self.is_digit() {
-              return Err(self.create_error_for_current_char("Expected a digit"));
+              return Err(self.create_error_for_current_char(ParseErrorKind::ExpectedDigit));
             }
           }
           _ => {
             if !self.is_digit() {
-              return Err(self.create_error_for_current_char("Expected plus, minus, or digit in number literal"));
+              return Err(self.create_error_for_current_char(ParseErrorKind::ExpectedPlusMinusOrDigitInNumberLiteral));
             }
           }
         }
@@ -243,7 +243,7 @@ impl<'a> Scanner<'a> {
       self.assert_then_move_char('/');
       Ok(Token::CommentBlock(&self.file_text[start_byte_index..end_byte_index]))
     } else {
-      Err(self.create_error_for_current_token("Unterminated comment block"))
+      Err(self.create_error_for_current_token(ParseErrorKind::UnterminatedCommentBlock))
     }
   }
 
@@ -292,7 +292,7 @@ impl<'a> Scanner<'a> {
         break;
       }
       if !current_char.is_alphanumeric() && current_char != '-' {
-        return Err(self.create_error_for_current_token("Unexpected token"));
+        return Err(self.create_error_for_current_token(ParseErrorKind::UnexpectedToken));
       }
 
       self.move_next_char();
@@ -301,7 +301,7 @@ impl<'a> Scanner<'a> {
     let end_byte_index = self.byte_index;
 
     if end_byte_index - start_byte_index == 0 {
-      return Err(self.create_error_for_current_token("Unexpected token"));
+      return Err(self.create_error_for_current_token(ParseErrorKind::UnexpectedToken));
     }
 
     Ok(Token::Word(&self.file_text[start_byte_index..end_byte_index]))
@@ -426,6 +426,7 @@ mod tests {
 
   use super::super::tokens::Token;
   use super::*;
+  use pretty_assertions::assert_eq;
 
   #[test]
   fn it_tokenizes_string() {
@@ -444,7 +445,7 @@ mod tests {
   fn it_errors_escaping_single_quote_in_double_quote() {
     assert_has_error(
       r#""t\'est""#,
-      "Invalid escape in double quote string on line 1 column 3.",
+      "Invalid escape in double quote string on line 1 column 3",
     );
   }
 
@@ -465,13 +466,13 @@ mod tests {
   fn it_errors_escaping_double_quote_in_single_quote() {
     assert_has_error(
       r#"'t\"est'"#,
-      "Invalid escape in single quote string on line 1 column 3.",
+      "Invalid escape in single quote string on line 1 column 3",
     );
   }
 
   #[test]
   fn it_errors_for_word_starting_with_invalid_token() {
-    assert_has_error(r#"{ &test }"#, "Unexpected token on line 1 column 3.");
+    assert_has_error(r#"{ &test }"#, "Unexpected token on line 1 column 3");
   }
 
   #[test]
@@ -499,9 +500,9 @@ mod tests {
   fn it_errors_invalid_exponent() {
     assert_has_error(
       r#"1ea"#,
-      "Expected plus, minus, or digit in number literal on line 1 column 3.",
+      "Expected plus, minus, or digit in number literal on line 1 column 3",
     );
-    assert_has_error(r#"1e-a"#, "Expected a digit on line 1 column 4.");
+    assert_has_error(r#"1e-a"#, "Expected digit on line 1 column 4");
   }
 
   #[test]
@@ -554,7 +555,7 @@ mod tests {
   fn it_errors_on_invalid_utf8_char_for_issue_6() {
     assert_has_error(
       "\"\\uDF06\"",
-      "Invalid unicode escape sequence. 'DF06' is not a valid UTF8 character on line 1 column 2.",
+      "Invalid unicode escape sequence. 'DF06' is not a valid UTF8 character on line 1 column 2",
     );
   }
 
diff --git a/src/serde.rs b/src/serde.rs
index 422f683..2aa3ac9 100644
--- a/src/serde.rs
+++ b/src/serde.rs
@@ -33,6 +33,7 @@ pub fn parse_to_serde_value(text: &str, parse_options: &ParseOptions) -> Result<
 
 #[cfg(test)]
 mod tests {
+  use pretty_assertions::assert_eq;
   use serde_json::Value as SerdeValue;
   use std::str::FromStr;
 
@@ -42,7 +43,7 @@ mod tests {
   fn it_should_error_when_has_error() {
     assert_has_error(
       "[][]",
-      "Text cannot contain more than one JSON value on line 1 column 3.",
+      "Text cannot contain more than one JSON value on line 1 column 3",
     );
   }
 
diff --git a/src/string.rs b/src/string.rs
index dc2e7f1..d74a241 100644
--- a/src/string.rs
+++ b/src/string.rs
@@ -5,7 +5,7 @@ pub struct ParseStringError {
   pub kind: ParseStringErrorKind,
 }
 
-#[derive(Debug, PartialEq)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum ParseStringErrorKind {
   InvalidEscapeInSingleQuoteString,
   InvalidEscapeInDoubleQuoteString,