diff --git a/src/Draco.Compiler.Tests/Draco.Compiler.Tests.csproj b/src/Draco.Compiler.Tests/Draco.Compiler.Tests.csproj index 058508eed..ffa0b58a6 100644 --- a/src/Draco.Compiler.Tests/Draco.Compiler.Tests.csproj +++ b/src/Draco.Compiler.Tests/Draco.Compiler.Tests.csproj @@ -26,4 +26,10 @@ + + + + PreserveNewest + + diff --git a/src/Draco.Compiler.Tests/Syntax/FuzzerTestCases/crash-16a56ef2ee1f6a2fcc8ddf2911e0f3fb1b36e3b9.draco b/src/Draco.Compiler.Tests/Syntax/FuzzerTestCases/crash-16a56ef2ee1f6a2fcc8ddf2911e0f3fb1b36e3b9.draco new file mode 100644 index 000000000..166595935 --- /dev/null +++ b/src/Draco.Compiler.Tests/Syntax/FuzzerTestCases/crash-16a56ef2ee1f6a2fcc8ddf2911e0f3fb1b36e3b9.draco @@ -0,0 +1 @@ +"\u{333333} \ No newline at end of file diff --git a/src/Draco.Compiler.Tests/Syntax/FuzzerTestCases/crash-81bbb6ad94b3f4802afcaab9ef895fc0ddef70d4.draco b/src/Draco.Compiler.Tests/Syntax/FuzzerTestCases/crash-81bbb6ad94b3f4802afcaab9ef895fc0ddef70d4.draco new file mode 100644 index 000000000..d4817db59 --- /dev/null +++ b/src/Draco.Compiler.Tests/Syntax/FuzzerTestCases/crash-81bbb6ad94b3f4802afcaab9ef895fc0ddef70d4.draco @@ -0,0 +1 @@ +'󉉉��������������������������������������� diff --git a/src/Draco.Compiler.Tests/Syntax/FuzzerTestCases/timeout-0faaf5cf37107e7f0ff88777816a4b98f5fc3c63.draco b/src/Draco.Compiler.Tests/Syntax/FuzzerTestCases/timeout-0faaf5cf37107e7f0ff88777816a4b98f5fc3c63.draco new file mode 100644 index 000000000..a84e0db90 --- /dev/null +++ b/src/Draco.Compiler.Tests/Syntax/FuzzerTestCases/timeout-0faaf5cf37107e7f0ff88777816a4b98f5fc3c63.draco @@ -0,0 +1 @@ +߄ diff --git a/src/Draco.Compiler.Tests/Syntax/LexerTests.cs b/src/Draco.Compiler.Tests/Syntax/LexerTests.cs index c2a456f02..3a32ae675 100644 --- a/src/Draco.Compiler.Tests/Syntax/LexerTests.cs +++ b/src/Draco.Compiler.Tests/Syntax/LexerTests.cs @@ -1,3 +1,4 @@ +using System.Diagnostics; using System.Text; using Draco.Compiler.Api.Diagnostics; using Draco.Compiler.Api.Syntax; @@ -1277,4 +1278,25 @@ public void TestHelloWorld() this.AssertNextToken(TokenKind.Semicolon, ";"); this.AssertNextToken(TokenKind.EndOfInput); } + + [Theory] + [InlineData("timeout-0faaf5cf37107e7f0ff88777816a4b98f5fc3c63.draco")] + [InlineData("crash-16a56ef2ee1f6a2fcc8ddf2911e0f3fb1b36e3b9.draco")] + [InlineData("crash-81bbb6ad94b3f4802afcaab9ef895fc0ddef70d4.draco")] + public void TestFuzzerSampleTimeout(string path) + { + var text = File.ReadAllText(Path.Combine("Syntax", "FuzzerTestCases", path), Encoding.UTF8); + var source = SourceReader.From(text); + var lexer = new Lexer(source, this.diagnostics); + var stopwatch = Stopwatch.StartNew(); + while (true) + { + var token = lexer.Lex(); + if (token.Kind == TokenKind.EndOfInput) break; + if (stopwatch.ElapsedMilliseconds > 5000 && !Debugger.IsAttached) + { + Assert.Fail("Timeout."); + } + } + } } diff --git a/src/Draco.Compiler/Internal/Syntax/Lexer.cs b/src/Draco.Compiler/Internal/Syntax/Lexer.cs index 55af5e931..5f6ba98ac 100644 --- a/src/Draco.Compiler/Internal/Syntax/Lexer.cs +++ b/src/Draco.Compiler/Internal/Syntax/Lexer.cs @@ -244,7 +244,7 @@ Unit TakeWithText(TokenKind tokenKind, int length) // NOTE: We check for numeric literals first, so we can be lazy with the identifier checking later // Since digits would be a valid identifier character, we can avoid separating the check for the // first character - if (char.IsDigit(ch)) + if (IsDigit(ch)) { // Check for what kind of integer do we have var radix = this.Peek(1); @@ -261,14 +261,14 @@ Unit TakeWithText(TokenKind tokenKind, int length) } var offset = 1; var isFloat = false; - while (char.IsDigit(this.Peek(offset))) ++offset; + while (IsDigit(this.Peek(offset))) ++offset; // Floating point number - if (this.Peek(offset) == '.' && char.IsDigit(this.Peek(offset + 1))) + if (this.Peek(offset) == '.' && IsDigit(this.Peek(offset + 1))) { isFloat = true; offset += 2; - while (char.IsDigit(this.Peek(offset))) ++offset; + while (IsDigit(this.Peek(offset))) ++offset; } if (char.ToLower(this.Peek(offset)) == 'e') @@ -276,7 +276,7 @@ Unit TakeWithText(TokenKind tokenKind, int length) isFloat = true; ++offset; if (this.Peek(offset) == '+' || this.Peek(offset) == '-') ++offset; - if (!char.IsDigit(this.Peek(offset))) + if (!IsDigit(this.Peek(offset))) { this.AddError( template: SyntaxErrors.UnexpectedFloatingPointLiteralEnd, @@ -287,7 +287,7 @@ Unit TakeWithText(TokenKind tokenKind, int length) .SetText(this.Advance(offset).Span.ToString()); return default; } - while (char.IsDigit(this.Peek(offset))) ++offset; + while (IsDigit(this.Peek(offset))) ++offset; } if (isFloat) @@ -396,7 +396,20 @@ var _ when ident.Span.SequenceEqual("while") => TokenKind.KeywordWhile, { // Regular character ++offset; - resultChar = new(ch2); + if (Rune.IsValid(ch2)) + { + resultChar = new(ch2); + } + else + { + // UnexpectedInput + this.AddError( + SyntaxErrors.IllegalCharacterLiteral, + offset: offset, + width: 1, + args: (int)ch2); + resultChar = DefaultCharacter; + } } else { @@ -738,6 +751,15 @@ private Rune ParseEscapeSequence(int escapeStart, ref int offset) ++offset; if (length > 0) { + if (!Rune.IsValid(unicodeValue)) + { + this.AddError( + SyntaxErrors.IllegalUnicodeCodepoint, + offset: escapeStart, + width: offset - escapeStart, + unicodeValue); + return DefaultCharacter; + } return new(unicodeValue); } else @@ -931,6 +953,7 @@ private bool TryPeek(int offset, out char result) => private static bool IsIdent(char ch) => char.IsLetterOrDigit(ch) || ch == '_'; private static bool IsSpace(char ch) => char.IsWhiteSpace(ch) && !IsNewline(ch); private static bool IsNewline(char ch) => ch == '\r' || ch == '\n'; + private static bool IsDigit(char ch) => ch >= '0' && ch <= '9'; private static bool TryParseHexDigit(char ch, out int value) { if (ch >= '0' && ch <= '9') diff --git a/src/Draco.Compiler/Internal/Syntax/SyntaxErrors.cs b/src/Draco.Compiler/Internal/Syntax/SyntaxErrors.cs index 31098fce7..d8e6c1195 100644 --- a/src/Draco.Compiler/Internal/Syntax/SyntaxErrors.cs +++ b/src/Draco.Compiler/Internal/Syntax/SyntaxErrors.cs @@ -46,6 +46,15 @@ internal static class SyntaxErrors format: "unclosed unicode codepoint escape sequence", code: Code(4)); + /// + /// A \u{...} construct that represent an invalid codepoint. + /// + public static readonly DiagnosticTemplate IllegalUnicodeCodepoint = DiagnosticTemplate.Create( + title: "illegal unicode codepoint", + severity: DiagnosticSeverity.Error, + format: "illegal unicode codepoint (code: {0})", + code: Code(5)); + /// /// An illegal escape character after '\'. /// @@ -53,7 +62,7 @@ internal static class SyntaxErrors title: "illegal escape character", severity: DiagnosticSeverity.Error, format: "illegal escape character '{0}'", - code: Code(5)); + code: Code(6)); /// /// A certain kind of token was expected while parsing. @@ -62,7 +71,7 @@ internal static class SyntaxErrors title: "expected token", severity: DiagnosticSeverity.Error, format: "expected token {0}", - code: Code(6)); + code: Code(7)); /// /// Some kind of unexpected input while parsing. @@ -71,7 +80,7 @@ internal static class SyntaxErrors title: "unexpected input", severity: DiagnosticSeverity.Error, format: "unexpected input while parsing {0}", - code: Code(7)); + code: Code(8)); /// /// Insufficient indentation in a multiline string. @@ -80,7 +89,7 @@ internal static class SyntaxErrors title: "insufficient indentation", severity: DiagnosticSeverity.Error, format: "insufficient indentation in multiline string", - code: Code(8)); + code: Code(9)); /// /// There are extra tokens inline with the opening quotes of a multiline string. @@ -89,7 +98,7 @@ internal static class SyntaxErrors title: "illegal tokens", severity: DiagnosticSeverity.Error, format: "illegal tokens inline with opening quotes of multiline string", - code: Code(9)); + code: Code(10)); /// /// The closing quotes of a multiline string are not on a new line. @@ -98,7 +107,7 @@ internal static class SyntaxErrors title: "closing quotes are not on a new line", severity: DiagnosticSeverity.Error, format: "closing quotes are not on a new line of multiline string", - code: Code(10)); + code: Code(11)); /// /// The literal ended unexpectedly. @@ -107,7 +116,7 @@ internal static class SyntaxErrors title: "unexpected floating-point literal end", severity: DiagnosticSeverity.Error, format: "unexpected end of scientific notation floating-point literal, expected one or more digits after exponent", - code: Code(11)); + code: Code(12)); /// /// The character literal ended unexpectedly. @@ -116,7 +125,7 @@ internal static class SyntaxErrors title: "unexpected character literal end", severity: DiagnosticSeverity.Error, format: "unexpected end of character literal", - code: Code(12)); + code: Code(13)); /// /// The escape sequence ended unexpectedly. @@ -125,7 +134,7 @@ internal static class SyntaxErrors title: "unexpected escape sequence end", severity: DiagnosticSeverity.Error, format: "unexpected end of escape sequence", - code: Code(13)); + code: Code(14)); /// /// An illegal language element in the context. @@ -134,7 +143,7 @@ internal static class SyntaxErrors title: "illegal element in context", severity: DiagnosticSeverity.Error, format: "illegal language element {0} in context", - code: Code(14)); + code: Code(15)); /// /// There is a visibility modifier before an import statement. @@ -143,5 +152,5 @@ internal static class SyntaxErrors title: "unexpected visibility modifier before import statement", severity: DiagnosticSeverity.Error, format: "unexpected {0} before import statement", - code: Code(15)); + code: Code(16)); }