Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

libnixf: use sync tokens for creating unknown nodes (error recovery) #316

Merged
merged 3 commits into from
Jan 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion libnixf/include/nixf/Basic/DiagnosticKinds.inc
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ DIAG("lex-float-leading-zero", FloatLeadingZero, Warning,
DIAG("parse-expected", Expected, Error, "expected {}")
DIAG("parse-attrpath-extra-dot", AttrPathExtraDot, Error, "extra `.` at the end of attrpath")
DIAG("parse-unexpected-between", UnexpectedBetween, Error, "unexpected {} between {} and {}")
DIAG("parse-unexpected", UnexpectedText, Error, "unexpected text {}")
DIAG("parse-unexpected", UnexpectedText, Error, "unexpected text")
DIAG("parse-missing-sep-formals", MissingSepFormals, Error, "missing seperator `,` between two lambda formals")
DIAG("let-dynamic", LetDynamic, Error,
"dynamic attributes are not allowed in let ... in ... expression")
Expand Down
4 changes: 4 additions & 0 deletions libnixf/include/nixf/Basic/Range.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ class Point {
}
Point() = default;

Point(const Point &) = default;
Point &operator=(const Point &) = default;
Point(Point &&) = default;

/// \brief Check if the point is at the given position.
[[nodiscard]] bool isAt(int64_t Line, int64_t Column,
std::size_t Offset) const {
Expand Down
208 changes: 128 additions & 80 deletions libnixf/src/Parse/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <charconv>
#include <deque>
#include <memory>
#include <set>
#include <stack>
#include <string>
#include <string_view>
Expand Down Expand Up @@ -49,6 +50,18 @@ class Parser {
std::optional<Token> LastToken;
std::stack<ParserState> State;

/// \brief Sync tokens for error recovery.
///
/// These tokens will be considered as the end of "unknown" node.
/// We create "unknown" node for recover from "extra" token error.
/// (Also, this node is invisible in the AST)
///
/// e.g. { foo....bar = ; }
/// ^~~ remove these tokens
///
/// Sync tokenswill not be eat as "unknown".
std::multiset<TokenKind> SyncTokens;

class StateRAII {
Parser &P;

Expand Down Expand Up @@ -106,6 +119,85 @@ class Parser {
return LookAheadBuf[N];
}

/// \brief Consume tokens until the next sync token.
/// \returns The consumed range. If no token is consumed, return nullopt.
std::optional<RangeTy> consumeAsUnknown() {
Point Begin = peek().begin();
bool Consumed = false;
for (Token Tok = peek(); Tok.kind() != tok_eof; Tok = peek()) {
if (SyncTokens.contains(Tok.kind()))
break;
Consumed = true;
consume();
}
if (!Consumed)
return std::nullopt;
assert(LastToken && "LastToken should be set after consume()");
return RangeTy{Begin, LastToken->end()};
}

class SyncRAII {
Parser &P;
TokenKind Kind;

public:
SyncRAII(Parser &P, TokenKind Kind) : P(P), Kind(Kind) {
P.SyncTokens.emplace(Kind);
}
~SyncRAII() { P.SyncTokens.erase(Kind); }
};

SyncRAII withSync(TokenKind Kind) { return {*this, Kind}; }

class ExpectResult {
bool Success;
std::optional<Token> Tok;
Diagnostic *DiagMissing;

public:
ExpectResult(Token Tok) : Success(true), Tok(Tok), DiagMissing(nullptr) {}
ExpectResult(Diagnostic *DiagMissing)
: Success(false), DiagMissing(DiagMissing) {}

[[nodiscard]] bool ok() const { return Success; }
[[nodiscard]] Token tok() const {
assert(Tok);
return *Tok;
}
[[nodiscard]] Diagnostic &diag() const {
assert(DiagMissing);
return *DiagMissing;
}
};

ExpectResult expect(TokenKind Kind) {
auto Sync = withSync(Kind);
if (Token Tok = peek(); Tok.kind() == Kind) {
return Tok;
}
// UNKNOWN ?
// ~~~~~~~ consider remove unexpected text
if (std::optional<RangeTy> UnknownRange = consumeAsUnknown()) {
Diagnostic &D =
Diags.emplace_back(Diagnostic::DK_UnexpectedText, *UnknownRange);
D.fix("remove unexpected text").edit(TextEdit::mkRemoval(*UnknownRange));

if (Token Tok = peek(); Tok.kind() == Kind) {
return Tok;
}
// If the next token is not the expected one, then insert it.
// (we have two errors now).
}
// expected Kind
Point Insert = LastToken ? LastToken->end() : peek().begin();
Diagnostic &D =
Diags.emplace_back(Diagnostic::DK_Expected, RangeTy(Insert));
D << std::string(tok::spelling(Kind));
D.fix("insert " + std::string(tok::spelling(Kind)))
.edit(TextEdit::mkInsertion(Insert, std::string(tok::spelling(Kind))));
return {&D};
}

void consume() {
if (LookAheadBuf.empty())
peek(0);
Expand All @@ -131,22 +223,18 @@ class Parser {
Token TokDollarCurly = peek();
assert(TokDollarCurly.kind() == tok_dollar_curly);
consume(); // ${
auto Sync = withSync(tok_r_curly);
assert(LastToken);
/* with(PS_Expr) */ {
auto ExprState = withState(PS_Expr);
auto Expr = parseExpr();
if (!Expr)
diagNullExpr(Diags, LastToken->end(), "interpolation");
if (peek().kind() == tok_r_curly) {
if (ExpectResult ER = expect(tok_r_curly); ER.ok()) {
consume(); // }
} else {
// expected "}" for interpolation
Diagnostic &D = Diags.emplace_back(Diagnostic::DK_Expected,
RangeTy(LastToken->end()));
D << std::string(tok::spelling(tok_r_curly));
D.note(Note::NK_ToMachThis, TokDollarCurly.range())
ER.diag().note(Note::NK_ToMachThis, TokDollarCurly.range())
<< std::string(tok::spelling(tok_dollar_curly));
D.fix("insert }").edit(TextEdit::mkInsertion(LastToken->end(), "}"));
}
return Expr;
} // with(PS_Expr)
Expand Down Expand Up @@ -234,31 +322,24 @@ class Parser {
assert(Quote.kind() == QuoteKind && "should be a quote");
// Consume the quote and so make the look-ahead buf empty.
consume();
auto Sync = withSync(QuoteKind);
assert(LastToken && "LastToken should be set after consume()");
/* with(PS_String / PS_IndString) */ {
auto StringState = withState(IsIndented ? PS_IndString : PS_String);
std::shared_ptr<InterpolatedParts> Parts = parseStringParts();
if (Token EndTok = peek(); EndTok.kind() == QuoteKind) {
if (ExpectResult ER = expect(QuoteKind); ER.ok()) {
consume();
return std::make_shared<ExprString>(
RangeTy{Quote.begin(), ER.tok().end()}, std::move(Parts));
} else { // NOLINT(readability-else-after-return)
ER.diag().note(Note::NK_ToMachThis, Quote.range()) << QuoteSpel;
return std::make_shared<ExprString>(
RangeTy{
Quote.begin(),
EndTok.end(),
Parts->end(),
},
std::move(Parts));
}
Diagnostic &D = Diags.emplace_back(Diagnostic::DK_Expected,
RangeTy(LastToken->end()));
D << QuoteSpel;
D.note(Note::NK_ToMachThis, Quote.range()) << QuoteSpel;
D.fix("insert " + QuoteSpel)
.edit(TextEdit::mkInsertion(LastToken->end(), QuoteSpel));
return std::make_shared<ExprString>(
RangeTy{
Quote.begin(),
Parts->end(),
},
std::move(Parts));

} // with(PS_String / PS_IndString)
}
Expand All @@ -269,36 +350,24 @@ class Parser {
auto LParen = std::make_shared<Misc>(L.range());
assert(L.kind() == tok_l_paren);
consume(); // (
auto Sync = withSync(tok_r_paren);
assert(LastToken && "LastToken should be set after consume()");
auto Expr = parseExpr();
if (!Expr)
diagNullExpr(Diags, LastToken->end(), "parenthesized");
if (Token R = peek(); R.kind() == tok_r_paren) {
if (ExpectResult ER = expect(tok_r_paren); ER.ok()) {
consume(); // )
auto RParen = std::make_shared<Misc>(R.range());
return std::make_shared<ExprParen>(
RangeTy{
L.begin(),
R.end(),
},
std::move(Expr), std::move(LParen), std::move(RParen));
auto RParen = std::make_shared<Misc>(ER.tok().range());
return std::make_shared<ExprParen>(RangeTy{L.begin(), ER.tok().end()},
std::move(Expr), std::move(LParen),
std::move(RParen));
} else { // NOLINT(readability-else-after-return)
ER.diag().note(Note::NK_ToMachThis, L.range())
<< std::string(tok::spelling(tok_l_paren));
return std::make_shared<ExprParen>(RangeTy{L.begin(), LastToken->end()},
std::move(Expr), std::move(LParen),
/*RParen=*/nullptr);
}

// Missing ")"
Diagnostic &D =
Diags.emplace_back(Diagnostic::DK_Expected, RangeTy(LastToken->end()));
D << std::string(tok::spelling(tok_r_paren));
D.note(Note::NK_ToMachThis, L.range())
<< std::string(tok::spelling(tok_l_paren));
D.fix("insert )")
.edit(TextEdit::mkInsertion(LastToken->end(),
std::string(tok::spelling(tok_r_paren))));
return std::make_shared<ExprParen>(
RangeTy{
L.begin(),
LastToken->end(),
},
std::move(Expr), std::move(LParen), /*RParen=*/nullptr);
}

// attrname : ID
Expand Down Expand Up @@ -354,12 +423,8 @@ class Parser {
}
break;
}
return std::make_shared<AttrPath>(
RangeTy{
Begin,
LastToken->end(),
},
std::move(AttrNames));
return std::make_shared<AttrPath>(RangeTy{Begin, LastToken->end()},
std::move(AttrNames));
}

// binding : attrpath '=' expr ';'
Expand All @@ -368,15 +433,10 @@ class Parser {
if (!Path)
return nullptr;
assert(LastToken && "LastToken should be set after valid attrpath");
if (Token Tok = peek(); Tok.kind() == tok_eq) {
auto SyncEq = withSync(tok_eq);
auto SyncSemi = withSync(tok_semi_colon);
if (ExpectResult ER = expect(tok_eq); ER.ok())
consume();
} else {
// expected "=" for binding
Diagnostic &D = Diags.emplace_back(Diagnostic::DK_Expected,
RangeTy(LastToken->end()));
D << std::string(tok::spelling(tok_eq));
D.fix("insert =").edit(TextEdit::mkInsertion(LastToken->end(), "="));
}
auto Expr = parseExpr();
if (!Expr)
diagNullExpr(Diags, LastToken->end(), "binding");
Expand All @@ -391,12 +451,8 @@ class Parser {
D << std::string(tok::spelling(tok_semi_colon));
D.fix("insert ;").edit(TextEdit::mkInsertion(LastToken->end(), ";"));
}
return std::make_shared<Binding>(
RangeTy{
Path->begin(),
LastToken->end(),
},
std::move(Path), std::move(Expr));
return std::make_shared<Binding>(RangeTy{Path->begin(), LastToken->end()},
std::move(Path), std::move(Expr));
}

// binds : ( binding | inherit )*
Expand All @@ -416,12 +472,8 @@ class Parser {
}
break;
}
return std::make_shared<Binds>(
RangeTy{
Begin,
LastToken->end(),
},
std::move(Bindings));
return std::make_shared<Binds>(RangeTy{Begin, LastToken->end()},
std::move(Bindings));
}

// attrset_expr : REC? '{' binds '}'
Expand All @@ -430,6 +482,8 @@ class Parser {
std::shared_ptr<ExprAttrs> parseExprAttrs() {
std::shared_ptr<Misc> Rec;

auto Sync = withSync(tok_r_curly);

// "to match this ..."
// if "{" is missing, then use "rec", otherwise use "{"
Token Matcher = peek();
Expand All @@ -452,17 +506,11 @@ class Parser {
}
assert(LastToken && "LastToken should be set after valid { or rec");
auto Binds = parseBinds();
if (Token Tok = peek(); Tok.kind() == tok_r_curly) {
if (ExpectResult ER = expect(tok_r_curly); ER.ok())
consume();
} else {
// expected "}" for attrset
Diagnostic &D = Diags.emplace_back(Diagnostic::DK_Expected,
RangeTy(LastToken->range()));
D << std::string(tok::spelling(tok_r_curly));
D.note(Note::NK_ToMachThis, Matcher.range())
else
ER.diag().note(Note::NK_ToMachThis, Matcher.range())
<< std::string(tok::spelling(Matcher.kind()));
D.fix("insert }").edit(TextEdit::mkInsertion(LastToken->end(), "}"));
}
return std::make_shared<ExprAttrs>(RangeTy{Begin, LastToken->end()},
std::move(Binds), std::move(Rec));
}
Expand Down
33 changes: 33 additions & 0 deletions libnixf/test/Parse/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -560,4 +560,37 @@ TEST(Parser, ExprVar) {
ASSERT_EQ(Diags.size(), 0);
}

TEST(Parser, SyncAttrs) {
auto Src = R"(
rec {
)))
a asd = 1;
}
)"sv;

std::vector<Diagnostic> Diags;
auto AST = nixf::parse(Src, Diags);

ASSERT_TRUE(AST);

ASSERT_EQ(Diags.size(), 1);
const auto &D = Diags[0];
ASSERT_TRUE(D.range().begin().isAt(2, 2, 9));
ASSERT_TRUE(D.range().end().isAt(3, 13, 26));
ASSERT_EQ(D.kind(), Diagnostic::DK_UnexpectedText);
ASSERT_EQ(D.args().size(), 0);

// Check the note.
ASSERT_EQ(D.notes().size(), 0);

// Check fix-it hints.
ASSERT_EQ(D.fixes().size(), 1);
ASSERT_EQ(D.fixes()[0].edits().size(), 1);
ASSERT_EQ(D.fixes()[0].message(), "remove unexpected text");
const auto &F = D.fixes()[0].edits()[0];
ASSERT_TRUE(F.oldRange().begin().isAt(2, 2, 9));
ASSERT_TRUE(F.oldRange().end().isAt(3, 13, 26));
ASSERT_EQ(F.newText(), "");
}

} // namespace