From 9e30dcbdc9eabd418d06f346606066fec50ff288 Mon Sep 17 00:00:00 2001 From: LPeter1997 Date: Tue, 20 Aug 2024 21:17:34 +0200 Subject: [PATCH] String escapes (#434) * Update TokenKind.cs * COmplete --- src/Draco.Compiler.Tests/Syntax/LexerTests.cs | 50 +++++++++++++------ .../Syntax/ParserTests.cs | 15 +++++- src/Draco.Compiler/Api/Syntax/TokenKind.cs | 5 ++ src/Draco.Compiler/Internal/Syntax/Lexer.cs | 22 ++++++-- src/Draco.Compiler/Internal/Syntax/Parser.cs | 4 +- src/Draco.Compiler/Internal/Syntax/Syntax.xml | 1 + 6 files changed, 75 insertions(+), 22 deletions(-) diff --git a/src/Draco.Compiler.Tests/Syntax/LexerTests.cs b/src/Draco.Compiler.Tests/Syntax/LexerTests.cs index 3a32ae675..11fdbbad8 100644 --- a/src/Draco.Compiler.Tests/Syntax/LexerTests.cs +++ b/src/Draco.Compiler.Tests/Syntax/LexerTests.cs @@ -66,6 +66,11 @@ private void AssertValue(object? value) Assert.NotNull(this.Current.Value); Assert.Equal(d, (double)this.Current.Value!, 5); } + else if (value is char ch) + { + Assert.NotNull(this.Current.Value); + Assert.Equal(new Rune(ch), (Rune)this.Current.Value!); + } else { Assert.Equal(value, this.Current.Value); @@ -216,10 +221,22 @@ public void TestLineStringEscapes(string ext) this.AssertNextToken(TokenKind.LineStringStart, $"{ext}\""); this.AssertNoTriviaOrDiagnostics(); - this.AssertNextToken( - TokenKind.StringContent, - @$"\{ext}""\{ext}\\{ext}n\{ext}'\{ext}u{{1F47D}}\{ext}0", - "\"\\\n'👽\0"); + this.AssertNextToken(TokenKind.EscapeSequence, @$"\{ext}""", '"'); + this.AssertNoTriviaOrDiagnostics(); + + this.AssertNextToken(TokenKind.EscapeSequence, @$"\{ext}\", '\\'); + this.AssertNoTriviaOrDiagnostics(); + + this.AssertNextToken(TokenKind.EscapeSequence, @$"\{ext}n", '\n'); + this.AssertNoTriviaOrDiagnostics(); + + this.AssertNextToken(TokenKind.EscapeSequence, @$"\{ext}'", '\''); + this.AssertNoTriviaOrDiagnostics(); + + this.AssertNextToken(TokenKind.EscapeSequence, @$"\{ext}u{{1F47D}}", new Rune(0x1F47D)); + this.AssertNoTriviaOrDiagnostics(); + + this.AssertNextToken(TokenKind.EscapeSequence, @$"\{ext}0", '\0'); this.AssertNoTriviaOrDiagnostics(); this.AssertNextToken(TokenKind.LineStringEnd, $"\"{ext}"); @@ -247,7 +264,7 @@ public void TestLineStringZeroLengthUnicodeCodepoint(string ext) Assert.Equal($"{ext}\"", this.Current.Text); this.AssertNoTriviaOrDiagnostics(); - this.AssertNextToken(TokenKind.StringContent, @$"\{ext}u{{}}", " "); + this.AssertNextToken(TokenKind.EscapeSequence, @$"\{ext}u{{}}", ' '); this.AssertNoTrivia(); this.AssertDiagnostics(SyntaxErrors.ZeroLengthUnicodeCodepoint); @@ -275,10 +292,13 @@ public void TestLineStringInvalidUnicodeCodepoint(string ext) this.AssertNoTriviaOrDiagnostics(); //TODO: change this when we get better errors out of invalid unicode codepoints - this.AssertNextToken(TokenKind.StringContent, @$"\{ext}u{{3S}}", " S}"); + this.AssertNextToken(TokenKind.EscapeSequence, @$"\{ext}u{{3", ' '); this.AssertNoTrivia(); this.AssertDiagnostics(SyntaxErrors.UnclosedUnicodeCodepoint); + this.AssertNextToken(TokenKind.StringContent, "S}", "S}"); + this.AssertNoTriviaOrDiagnostics(); + this.AssertNextToken(TokenKind.LineStringEnd, $"\"{ext}"); this.AssertNoTriviaOrDiagnostics(); @@ -302,7 +322,7 @@ public void TestLineStringUnclosedUnicodeCodepoint(string ext) this.AssertNextToken(TokenKind.LineStringStart, $"{ext}\""); this.AssertNoTriviaOrDiagnostics(); - this.AssertNextToken(TokenKind.StringContent, @$"\{ext}u{{", " "); + this.AssertNextToken(TokenKind.EscapeSequence, @$"\{ext}u{{", ' '); this.AssertNoTrivia(); this.AssertDiagnostics(SyntaxErrors.UnclosedUnicodeCodepoint); @@ -325,10 +345,10 @@ public void TestLineStringMixedEscapes() this.AssertNextToken(TokenKind.LineStringStart, "##\""); this.AssertNoTriviaOrDiagnostics(); - this.AssertNextToken( - TokenKind.StringContent, - @"\a\#n\#u{123}\##t", - "\\a\\#n\\#u{123}\t"); + this.AssertNextToken(TokenKind.StringContent, @"\a\#n\#u{123}", "\\a\\#n\\#u{123}"); + this.AssertNoTriviaOrDiagnostics(); + + this.AssertNextToken(TokenKind.EscapeSequence, @"\##t", '\t'); this.AssertNoTriviaOrDiagnostics(); this.AssertNextToken(TokenKind.LineStringEnd, $"\"##"); @@ -414,7 +434,7 @@ public void TestIllegalEscapeCharacterInLineString(string ext) this.AssertNextToken(TokenKind.LineStringStart, $"{ext}\""); this.AssertNoTriviaOrDiagnostics(); - this.AssertNextToken(TokenKind.StringContent, @$"\{ext}y", " "); + this.AssertNextToken(TokenKind.EscapeSequence, @$"\{ext}y", ' '); this.AssertNoTrivia(); this.AssertDiagnostics(SyntaxErrors.IllegalEscapeCharacter); @@ -998,7 +1018,7 @@ public void TestEndOfInputAfterEscapeSequenceStart(string ext) this.AssertNextToken(TokenKind.LineStringStart, $"{ext}\""); this.AssertNoTriviaOrDiagnostics(); - this.AssertNextToken(TokenKind.StringContent, $"\\{ext}", " "); + this.AssertNextToken(TokenKind.EscapeSequence, $"\\{ext}", ' '); this.AssertNoTrivia(); this.AssertDiagnostics(SyntaxErrors.UnexpectedEscapeSequenceEnd); @@ -1023,7 +1043,7 @@ public void TestEndOfInputAfterEscapeSequenceStartAndWhitespace(string ext) this.AssertNextToken(TokenKind.LineStringStart, $"{ext}\""); this.AssertNoTriviaOrDiagnostics(); - this.AssertNextToken(TokenKind.StringContent, $"\\{ext}{space}", $"{space}"); + this.AssertNextToken(TokenKind.EscapeSequence, $"\\{ext}{space}", ' '); this.AssertNoTrivia(); this.AssertDiagnostics(SyntaxErrors.IllegalEscapeCharacter); @@ -1226,7 +1246,7 @@ public void TestUnclosedCharLiteral() var text = "'a"; this.Lex(text); - this.AssertNextToken(TokenKind.LiteralCharacter, text, new Rune('a')); + this.AssertNextToken(TokenKind.LiteralCharacter, text, 'a'); this.AssertNoTrivia(); this.AssertDiagnostics(SyntaxErrors.UnclosedCharacterLiteral); diff --git a/src/Draco.Compiler.Tests/Syntax/ParserTests.cs b/src/Draco.Compiler.Tests/Syntax/ParserTests.cs index 9ab912636..281fb9ce2 100644 --- a/src/Draco.Compiler.Tests/Syntax/ParserTests.cs +++ b/src/Draco.Compiler.Tests/Syntax/ParserTests.cs @@ -114,6 +114,14 @@ private void StringContent(string content) } } + private void StringEscape(string content) + { + this.N(); + { + this.TValue(TokenKind.EscapeSequence, content); + } + } + [Fact] public void TestEmpty() { @@ -332,7 +340,12 @@ public void TestStringEscapes() { this.T(TokenKind.LineStringStart); this.N>(); - this.StringContent("Hello, \nWorld! 👽"); + { + this.StringContent("Hello, "); + this.StringEscape("\n"); + this.StringContent("World! "); + this.StringEscape("👽"); + } this.T(TokenKind.LineStringEnd); } } diff --git a/src/Draco.Compiler/Api/Syntax/TokenKind.cs b/src/Draco.Compiler/Api/Syntax/TokenKind.cs index 9c6c533e5..28316a6d2 100644 --- a/src/Draco.Compiler/Api/Syntax/TokenKind.cs +++ b/src/Draco.Compiler/Api/Syntax/TokenKind.cs @@ -65,6 +65,11 @@ public enum TokenKind /// StringNewline, + /// + /// An escape sequence in a string. + /// + EscapeSequence, + /// /// The start of string interpolation. /// diff --git a/src/Draco.Compiler/Internal/Syntax/Lexer.cs b/src/Draco.Compiler/Internal/Syntax/Lexer.cs index 33423244f..d2bc20be9 100644 --- a/src/Draco.Compiler/Internal/Syntax/Lexer.cs +++ b/src/Draco.Compiler/Internal/Syntax/Lexer.cs @@ -382,7 +382,7 @@ var _ when ident.Span.SequenceEqual("while") => TokenKind.KeywordWhile, this.tokenBuilder .SetKind(TokenKind.LiteralCharacter) .SetText(errText) - .SetValue(' '); + .SetValue(new Rune(' ')); return default; } var resultChar = default(Rune); @@ -571,6 +571,17 @@ private Unit LexString() if (this.Peek(offset + i + 1) != '#') goto not_escape_sequence; } + if (escapeStart != 0) + { + // This is an escape, but we have content before it + // Return that, the next call will return the escape + this.tokenBuilder + .SetKind(TokenKind.StringContent) + .SetText(this.AdvanceWithText(offset)) + .SetValue(this.valueBuilder.ToString()); + return default; + } + // Interpolation if (this.Peek(offset + mode.ExtendedDelims + 1) == '{') { @@ -632,9 +643,12 @@ private Unit LexString() offset += mode.ExtendedDelims + 1; // Try to parse an escape var escaped = this.ParseEscapeSequence(escapeStart, ref offset); - // Append to result - this.valueBuilder.Append(escaped); - goto start; + // Return as the result + this.tokenBuilder + .SetKind(TokenKind.EscapeSequence) + .SetText(this.AdvanceWithText(offset)) + .SetValue(escaped); + return default; } not_escape_sequence: diff --git a/src/Draco.Compiler/Internal/Syntax/Parser.cs b/src/Draco.Compiler/Internal/Syntax/Parser.cs index e16032f7e..d55352926 100644 --- a/src/Draco.Compiler/Internal/Syntax/Parser.cs +++ b/src/Draco.Compiler/Internal/Syntax/Parser.cs @@ -1058,7 +1058,7 @@ private StringExpressionSyntax ParseLineString() while (true) { var peek = this.Peek(); - if (peek == TokenKind.StringContent) + if (peek == TokenKind.StringContent || peek == TokenKind.EscapeSequence) { var part = this.Advance(); content.Add(new TextStringPartSyntax(part)); @@ -1106,7 +1106,7 @@ private StringExpressionSyntax ParseMultiLineString() while (true) { var peek = this.Peek(); - if (peek == TokenKind.StringContent || peek == TokenKind.StringNewline) + if (peek == TokenKind.StringContent || peek == TokenKind.StringNewline || peek == TokenKind.EscapeSequence) { var part = this.Advance(); content.Add(new TextStringPartSyntax(part)); diff --git a/src/Draco.Compiler/Internal/Syntax/Syntax.xml b/src/Draco.Compiler/Internal/Syntax/Syntax.xml index 0c4538da6..8b5e3a9c2 100644 --- a/src/Draco.Compiler/Internal/Syntax/Syntax.xml +++ b/src/Draco.Compiler/Internal/Syntax/Syntax.xml @@ -1179,6 +1179,7 @@ +