Skip to content

Commit

Permalink
String escapes (#434)
Browse files Browse the repository at this point in the history
* Update TokenKind.cs

* COmplete
  • Loading branch information
LPeter1997 authored Aug 20, 2024
1 parent 6f9f6e1 commit 9e30dcb
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 22 deletions.
50 changes: 35 additions & 15 deletions src/Draco.Compiler.Tests/Syntax/LexerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@ private void AssertValue(object? value)
Assert.NotNull(this.Current.Value);
Assert.Equal(d, (double)this.Current.Value!, 5);
}
else if (value is char ch)
{
Assert.NotNull(this.Current.Value);
Assert.Equal(new Rune(ch), (Rune)this.Current.Value!);
}
else
{
Assert.Equal(value, this.Current.Value);
Expand Down Expand Up @@ -216,10 +221,22 @@ public void TestLineStringEscapes(string ext)
this.AssertNextToken(TokenKind.LineStringStart, $"{ext}\"");
this.AssertNoTriviaOrDiagnostics();

this.AssertNextToken(
TokenKind.StringContent,
@$"\{ext}""\{ext}\\{ext}n\{ext}'\{ext}u{{1F47D}}\{ext}0",
"\"\\\n'👽\0");
this.AssertNextToken(TokenKind.EscapeSequence, @$"\{ext}""", '"');
this.AssertNoTriviaOrDiagnostics();

this.AssertNextToken(TokenKind.EscapeSequence, @$"\{ext}\", '\\');
this.AssertNoTriviaOrDiagnostics();

this.AssertNextToken(TokenKind.EscapeSequence, @$"\{ext}n", '\n');
this.AssertNoTriviaOrDiagnostics();

this.AssertNextToken(TokenKind.EscapeSequence, @$"\{ext}'", '\'');
this.AssertNoTriviaOrDiagnostics();

this.AssertNextToken(TokenKind.EscapeSequence, @$"\{ext}u{{1F47D}}", new Rune(0x1F47D));
this.AssertNoTriviaOrDiagnostics();

this.AssertNextToken(TokenKind.EscapeSequence, @$"\{ext}0", '\0');
this.AssertNoTriviaOrDiagnostics();

this.AssertNextToken(TokenKind.LineStringEnd, $"\"{ext}");
Expand Down Expand Up @@ -247,7 +264,7 @@ public void TestLineStringZeroLengthUnicodeCodepoint(string ext)
Assert.Equal($"{ext}\"", this.Current.Text);
this.AssertNoTriviaOrDiagnostics();

this.AssertNextToken(TokenKind.StringContent, @$"\{ext}u{{}}", " ");
this.AssertNextToken(TokenKind.EscapeSequence, @$"\{ext}u{{}}", ' ');
this.AssertNoTrivia();
this.AssertDiagnostics(SyntaxErrors.ZeroLengthUnicodeCodepoint);

Expand Down Expand Up @@ -275,10 +292,13 @@ public void TestLineStringInvalidUnicodeCodepoint(string ext)
this.AssertNoTriviaOrDiagnostics();

//TODO: change this when we get better errors out of invalid unicode codepoints
this.AssertNextToken(TokenKind.StringContent, @$"\{ext}u{{3S}}", " S}");
this.AssertNextToken(TokenKind.EscapeSequence, @$"\{ext}u{{3", ' ');
this.AssertNoTrivia();
this.AssertDiagnostics(SyntaxErrors.UnclosedUnicodeCodepoint);

this.AssertNextToken(TokenKind.StringContent, "S}", "S}");
this.AssertNoTriviaOrDiagnostics();

this.AssertNextToken(TokenKind.LineStringEnd, $"\"{ext}");
this.AssertNoTriviaOrDiagnostics();

Expand All @@ -302,7 +322,7 @@ public void TestLineStringUnclosedUnicodeCodepoint(string ext)
this.AssertNextToken(TokenKind.LineStringStart, $"{ext}\"");
this.AssertNoTriviaOrDiagnostics();

this.AssertNextToken(TokenKind.StringContent, @$"\{ext}u{{", " ");
this.AssertNextToken(TokenKind.EscapeSequence, @$"\{ext}u{{", ' ');
this.AssertNoTrivia();
this.AssertDiagnostics(SyntaxErrors.UnclosedUnicodeCodepoint);

Expand All @@ -325,10 +345,10 @@ public void TestLineStringMixedEscapes()
this.AssertNextToken(TokenKind.LineStringStart, "##\"");
this.AssertNoTriviaOrDiagnostics();

this.AssertNextToken(
TokenKind.StringContent,
@"\a\#n\#u{123}\##t",
"\\a\\#n\\#u{123}\t");
this.AssertNextToken(TokenKind.StringContent, @"\a\#n\#u{123}", "\\a\\#n\\#u{123}");
this.AssertNoTriviaOrDiagnostics();

this.AssertNextToken(TokenKind.EscapeSequence, @"\##t", '\t');
this.AssertNoTriviaOrDiagnostics();

this.AssertNextToken(TokenKind.LineStringEnd, $"\"##");
Expand Down Expand Up @@ -414,7 +434,7 @@ public void TestIllegalEscapeCharacterInLineString(string ext)
this.AssertNextToken(TokenKind.LineStringStart, $"{ext}\"");
this.AssertNoTriviaOrDiagnostics();

this.AssertNextToken(TokenKind.StringContent, @$"\{ext}y", " ");
this.AssertNextToken(TokenKind.EscapeSequence, @$"\{ext}y", ' ');
this.AssertNoTrivia();
this.AssertDiagnostics(SyntaxErrors.IllegalEscapeCharacter);

Expand Down Expand Up @@ -998,7 +1018,7 @@ public void TestEndOfInputAfterEscapeSequenceStart(string ext)
this.AssertNextToken(TokenKind.LineStringStart, $"{ext}\"");
this.AssertNoTriviaOrDiagnostics();

this.AssertNextToken(TokenKind.StringContent, $"\\{ext}", " ");
this.AssertNextToken(TokenKind.EscapeSequence, $"\\{ext}", ' ');
this.AssertNoTrivia();
this.AssertDiagnostics(SyntaxErrors.UnexpectedEscapeSequenceEnd);

Expand All @@ -1023,7 +1043,7 @@ public void TestEndOfInputAfterEscapeSequenceStartAndWhitespace(string ext)
this.AssertNextToken(TokenKind.LineStringStart, $"{ext}\"");
this.AssertNoTriviaOrDiagnostics();

this.AssertNextToken(TokenKind.StringContent, $"\\{ext}{space}", $"{space}");
this.AssertNextToken(TokenKind.EscapeSequence, $"\\{ext}{space}", ' ');
this.AssertNoTrivia();
this.AssertDiagnostics(SyntaxErrors.IllegalEscapeCharacter);

Expand Down Expand Up @@ -1226,7 +1246,7 @@ public void TestUnclosedCharLiteral()
var text = "'a";
this.Lex(text);

this.AssertNextToken(TokenKind.LiteralCharacter, text, new Rune('a'));
this.AssertNextToken(TokenKind.LiteralCharacter, text, 'a');
this.AssertNoTrivia();
this.AssertDiagnostics(SyntaxErrors.UnclosedCharacterLiteral);

Expand Down
15 changes: 14 additions & 1 deletion src/Draco.Compiler.Tests/Syntax/ParserTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,14 @@ private void StringContent(string content)
}
}

private void StringEscape(string content)
{
this.N<TextStringPartSyntax>();
{
this.TValue(TokenKind.EscapeSequence, content);
}
}

[Fact]
public void TestEmpty()
{
Expand Down Expand Up @@ -332,7 +340,12 @@ public void TestStringEscapes()
{
this.T(TokenKind.LineStringStart);
this.N<SyntaxList<StringPartSyntax>>();
this.StringContent("Hello, \nWorld! 👽");
{
this.StringContent("Hello, ");
this.StringEscape("\n");
this.StringContent("World! ");
this.StringEscape("👽");
}
this.T(TokenKind.LineStringEnd);
}
}
Expand Down
5 changes: 5 additions & 0 deletions src/Draco.Compiler/Api/Syntax/TokenKind.cs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ public enum TokenKind
/// </summary>
StringNewline,

/// <summary>
/// An escape sequence in a string.
/// </summary>
EscapeSequence,

/// <summary>
/// The start of string interpolation.
/// </summary>
Expand Down
22 changes: 18 additions & 4 deletions src/Draco.Compiler/Internal/Syntax/Lexer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ var _ when ident.Span.SequenceEqual("while") => TokenKind.KeywordWhile,
this.tokenBuilder
.SetKind(TokenKind.LiteralCharacter)
.SetText(errText)
.SetValue(' ');
.SetValue(new Rune(' '));
return default;
}
var resultChar = default(Rune);
Expand Down Expand Up @@ -571,6 +571,17 @@ private Unit LexString()
if (this.Peek(offset + i + 1) != '#') goto not_escape_sequence;
}

if (escapeStart != 0)
{
// This is an escape, but we have content before it
// Return that, the next call will return the escape
this.tokenBuilder
.SetKind(TokenKind.StringContent)
.SetText(this.AdvanceWithText(offset))
.SetValue(this.valueBuilder.ToString());
return default;
}

// Interpolation
if (this.Peek(offset + mode.ExtendedDelims + 1) == '{')
{
Expand Down Expand Up @@ -632,9 +643,12 @@ private Unit LexString()
offset += mode.ExtendedDelims + 1;
// Try to parse an escape
var escaped = this.ParseEscapeSequence(escapeStart, ref offset);
// Append to result
this.valueBuilder.Append(escaped);
goto start;
// Return as the result
this.tokenBuilder
.SetKind(TokenKind.EscapeSequence)
.SetText(this.AdvanceWithText(offset))
.SetValue(escaped);
return default;
}

not_escape_sequence:
Expand Down
4 changes: 2 additions & 2 deletions src/Draco.Compiler/Internal/Syntax/Parser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1058,7 +1058,7 @@ private StringExpressionSyntax ParseLineString()
while (true)
{
var peek = this.Peek();
if (peek == TokenKind.StringContent)
if (peek == TokenKind.StringContent || peek == TokenKind.EscapeSequence)
{
var part = this.Advance();
content.Add(new TextStringPartSyntax(part));
Expand Down Expand Up @@ -1106,7 +1106,7 @@ private StringExpressionSyntax ParseMultiLineString()
while (true)
{
var peek = this.Peek();
if (peek == TokenKind.StringContent || peek == TokenKind.StringNewline)
if (peek == TokenKind.StringContent || peek == TokenKind.StringNewline || peek == TokenKind.EscapeSequence)
{
var part = this.Advance();
content.Add(new TextStringPartSyntax(part));
Expand Down
1 change: 1 addition & 0 deletions src/Draco.Compiler/Internal/Syntax/Syntax.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1179,6 +1179,7 @@

<Token Kind="StringContent" />
<Token Kind="StringNewline" />
<Token Kind="EscapeSequence" />
</Field>
</Node>

Expand Down

0 comments on commit 9e30dcb

Please sign in to comment.