Skip to content

Commit

Permalink
libnixf: use sync tokens for creating unknown nodes (error recovery)
Browse files Browse the repository at this point in the history
  • Loading branch information
inclyc committed Jan 23, 2024
1 parent fbdbeed commit a6dd59c
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 85 deletions.
2 changes: 1 addition & 1 deletion libnixf/include/nixf/Basic/DiagnosticKinds.inc
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ DIAG("lex-float-leading-zero", FloatLeadingZero, Warning,
DIAG("parse-expected", Expected, Error, "expected {}")
DIAG("parse-attrpath-extra-dot", AttrPathExtraDot, Error, "extra `.` at the end of attrpath")
DIAG("parse-unexpected-between", UnexpectedBetween, Error, "unexpected {} between {} and {}")
DIAG("parse-unexpected", UnexpectedText, Error, "unexpected text {}")
DIAG("parse-unexpected", UnexpectedText, Error, "unexpected text")
DIAG("parse-missing-sep-formals", MissingSepFormals, Error, "missing seperator `,` between two lambda formals")
DIAG("let-dynamic", LetDynamic, Error,
"dynamic attributes are not allowed in let ... in ... expression")
Expand Down
4 changes: 4 additions & 0 deletions libnixf/include/nixf/Basic/Range.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ class Point {
}
Point() = default;

Point(const Point &) = default;
Point &operator=(const Point &) = default;
Point(Point &&) = default;

/// \brief Check if the point is at the given position.
[[nodiscard]] bool isAt(int64_t Line, int64_t Column,
std::size_t Offset) const {
Expand Down
213 changes: 129 additions & 84 deletions libnixf/src/Parse/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <charconv>
#include <deque>
#include <memory>
#include <set>
#include <stack>
#include <string>
#include <string_view>
Expand Down Expand Up @@ -49,6 +50,18 @@ class Parser {
std::optional<Token> LastToken;
std::stack<ParserState> State;

/// \brief Sync tokens for error recovery.
///
/// These tokens will be considered as the end of "unknown" node.
/// We create "unknown" node for recover from "extra" token error.
/// (Also, this node is invisible in the AST)
///
/// e.g. { foo....bar = ; }
/// ^~~ remove these tokens
///
/// Sync tokenswill not be eat as "unknown".
std::multiset<TokenKind> SyncTokens;

class StateRAII {
Parser &P;

Expand Down Expand Up @@ -106,6 +119,88 @@ class Parser {
return LookAheadBuf[N];
}

/// \brief Consume tokens until the next sync token.
/// \returns The consumed range. If no token is consumed, return nullopt.
std::optional<RangeTy> consumeAsUnknown() {
Point Begin = peek().begin();
bool Consumed = false;
for (Token Tok = peek(); Tok.kind() != tok_eof; Tok = peek()) {
if (std::find(SyncTokens.begin(), SyncTokens.end(), Tok.kind()) !=
SyncTokens.end())
break;
Consumed = true;
consume();
}
if (!Consumed)
return std::nullopt;
assert(LastToken && "LastToken should be set after consume()");
return RangeTy{Begin, LastToken->end()};
}

class SyncRAII {
Parser &P;
TokenKind Kind;

public:
SyncRAII(Parser &P, TokenKind Kind) : P(P), Kind(Kind) {
P.SyncTokens.emplace(Kind);
}
~SyncRAII() { P.SyncTokens.erase(Kind); }
};

SyncRAII withSync(TokenKind Kind) { return {*this, Kind}; }

class ExpectResult {
bool Success;
std::optional<Token> Tok;
Diagnostic *DiagMissing;

public:
ExpectResult(Token Tok) : Success(true), Tok(Tok), DiagMissing(nullptr) {}
ExpectResult(Diagnostic *DiagMissing)
: Success(false), DiagMissing(DiagMissing) {}

[[nodiscard]] bool success() const { return Success; }
[[nodiscard]] Token tok() const {
assert(Tok);
return *Tok;
}
[[nodiscard]] Diagnostic &diag() const {
assert(DiagMissing);
return *DiagMissing;
}
};

ExpectResult expect(TokenKind Kind) {
auto Sync = withSync(Kind);
if (Token Tok = peek(); Tok.kind() == Kind) {
consume();
return Tok;
}
// UNKNOWN ?
// ~~~~~~~ consider remove unexpected text
if (std::optional<RangeTy> UnknownRange = consumeAsUnknown()) {
Diagnostic &D =
Diags.emplace_back(Diagnostic::DK_UnexpectedText, *UnknownRange);
D.fix("remove unexpected text").edit(TextEdit::mkRemoval(*UnknownRange));

if (Token Tok = peek(); Tok.kind() == Kind) {
consume();
return Tok;
}
// If the next token is not the expected one, then insert it.
// (we have two errors now).
}
// expected Kind
Point Insert = LastToken ? LastToken->end() : peek().begin();
Diagnostic &D =
Diags.emplace_back(Diagnostic::DK_Expected, RangeTy(Insert));
D << std::string(tok::spelling(Kind));
D.fix("insert " + std::string(tok::spelling(Kind)))
.edit(TextEdit::mkInsertion(Insert, std::string(tok::spelling(Kind))));
return {&D};
}

void consume() {
if (LookAheadBuf.empty())
peek(0);
Expand All @@ -131,23 +226,17 @@ class Parser {
Token TokDollarCurly = peek();
assert(TokDollarCurly.kind() == tok_dollar_curly);
consume(); // ${
auto Sync = withSync(tok_r_curly);
assert(LastToken);
/* with(PS_Expr) */ {
auto ExprState = withState(PS_Expr);
auto Expr = parseExpr();
if (!Expr)
diagNullExpr(Diags, LastToken->end(), "interpolation");
if (peek().kind() == tok_r_curly) {
consume(); // }
} else {
// expected "}" for interpolation
Diagnostic &D = Diags.emplace_back(Diagnostic::DK_Expected,
RangeTy(LastToken->end()));
D << std::string(tok::spelling(tok_r_curly));
D.note(Note::NK_ToMachThis, TokDollarCurly.range())
ExpectResult ER = expect(tok_r_curly); // }
if (!ER.success())
ER.diag().note(Note::NK_ToMachThis, TokDollarCurly.range())
<< std::string(tok::spelling(tok_dollar_curly));
D.fix("insert }").edit(TextEdit::mkInsertion(LastToken->end(), "}"));
}
return Expr;
} // with(PS_Expr)
return nullptr;
Expand Down Expand Up @@ -234,25 +323,17 @@ class Parser {
assert(Quote.kind() == QuoteKind && "should be a quote");
// Consume the quote and so make the look-ahead buf empty.
consume();
auto Sync = withSync(QuoteKind);
assert(LastToken && "LastToken should be set after consume()");
/* with(PS_String / PS_IndString) */ {
auto StringState = withState(IsIndented ? PS_IndString : PS_String);
std::shared_ptr<InterpolatedParts> Parts = parseStringParts();
if (Token EndTok = peek(); EndTok.kind() == QuoteKind) {
consume();
ExpectResult ER = expect(QuoteKind);
if (ER.success()) {
return std::make_shared<ExprString>(
RangeTy{
Quote.begin(),
EndTok.end(),
},
std::move(Parts));
RangeTy{Quote.begin(), ER.tok().end()}, std::move(Parts));
}
Diagnostic &D = Diags.emplace_back(Diagnostic::DK_Expected,
RangeTy(LastToken->end()));
D << QuoteSpel;
D.note(Note::NK_ToMachThis, Quote.range()) << QuoteSpel;
D.fix("insert " + QuoteSpel)
.edit(TextEdit::mkInsertion(LastToken->end(), QuoteSpel));
ER.diag().note(Note::NK_ToMachThis, Quote.range()) << QuoteSpel;
return std::make_shared<ExprString>(
RangeTy{
Quote.begin(),
Expand All @@ -269,36 +350,23 @@ class Parser {
auto LParen = std::make_shared<Misc>(L.range());
assert(L.kind() == tok_l_paren);
consume(); // (
auto Sync = withSync(tok_r_paren);
assert(LastToken && "LastToken should be set after consume()");
auto Expr = parseExpr();
if (!Expr)
diagNullExpr(Diags, LastToken->end(), "parenthesized");
if (Token R = peek(); R.kind() == tok_r_paren) {
consume(); // )
auto RParen = std::make_shared<Misc>(R.range());
return std::make_shared<ExprParen>(
RangeTy{
L.begin(),
R.end(),
},
std::move(Expr), std::move(LParen), std::move(RParen));
ExpectResult ER = expect(tok_r_paren); // )
if (ER.success()) {
auto RParen = std::make_shared<Misc>(ER.tok().range());
return std::make_shared<ExprParen>(RangeTy{L.begin(), ER.tok().end()},
std::move(Expr), std::move(LParen),
std::move(RParen));
}

// Missing ")"
Diagnostic &D =
Diags.emplace_back(Diagnostic::DK_Expected, RangeTy(LastToken->end()));
D << std::string(tok::spelling(tok_r_paren));
D.note(Note::NK_ToMachThis, L.range())
ER.diag().note(Note::NK_ToMachThis, L.range())
<< std::string(tok::spelling(tok_l_paren));
D.fix("insert )")
.edit(TextEdit::mkInsertion(LastToken->end(),
std::string(tok::spelling(tok_r_paren))));
return std::make_shared<ExprParen>(
RangeTy{
L.begin(),
LastToken->end(),
},
std::move(Expr), std::move(LParen), /*RParen=*/nullptr);
return std::make_shared<ExprParen>(RangeTy{L.begin(), LastToken->end()},
std::move(Expr), std::move(LParen),
/*RParen=*/nullptr);
}

// attrname : ID
Expand Down Expand Up @@ -354,12 +422,8 @@ class Parser {
}
break;
}
return std::make_shared<AttrPath>(
RangeTy{
Begin,
LastToken->end(),
},
std::move(AttrNames));
return std::make_shared<AttrPath>(RangeTy{Begin, LastToken->end()},
std::move(AttrNames));
}

// binding : attrpath '=' expr ';'
Expand All @@ -368,15 +432,9 @@ class Parser {
if (!Path)
return nullptr;
assert(LastToken && "LastToken should be set after valid attrpath");
if (Token Tok = peek(); Tok.kind() == tok_eq) {
consume();
} else {
// expected "=" for binding
Diagnostic &D = Diags.emplace_back(Diagnostic::DK_Expected,
RangeTy(LastToken->end()));
D << std::string(tok::spelling(tok_eq));
D.fix("insert =").edit(TextEdit::mkInsertion(LastToken->end(), "="));
}
auto SyncEq = withSync(tok_eq);
auto SyncSemi = withSync(tok_semi_colon);
expect(tok_eq);
auto Expr = parseExpr();
if (!Expr)
diagNullExpr(Diags, LastToken->end(), "binding");
Expand All @@ -391,12 +449,8 @@ class Parser {
D << std::string(tok::spelling(tok_semi_colon));
D.fix("insert ;").edit(TextEdit::mkInsertion(LastToken->end(), ";"));
}
return std::make_shared<Binding>(
RangeTy{
Path->begin(),
LastToken->end(),
},
std::move(Path), std::move(Expr));
return std::make_shared<Binding>(RangeTy{Path->begin(), LastToken->end()},
std::move(Path), std::move(Expr));
}

// binds : ( binding | inherit )*
Expand All @@ -416,12 +470,8 @@ class Parser {
}
break;
}
return std::make_shared<Binds>(
RangeTy{
Begin,
LastToken->end(),
},
std::move(Bindings));
return std::make_shared<Binds>(RangeTy{Begin, LastToken->end()},
std::move(Bindings));
}

// attrset_expr : REC? '{' binds '}'
Expand All @@ -430,6 +480,8 @@ class Parser {
std::shared_ptr<ExprAttrs> parseExprAttrs() {
std::shared_ptr<Misc> Rec;

auto Sync = withSync(tok_r_curly);

// "to match this ..."
// if "{" is missing, then use "rec", otherwise use "{"
Token Matcher = peek();
Expand All @@ -452,17 +504,10 @@ class Parser {
}
assert(LastToken && "LastToken should be set after valid { or rec");
auto Binds = parseBinds();
if (Token Tok = peek(); Tok.kind() == tok_r_curly) {
consume();
} else {
// expected "}" for attrset
Diagnostic &D = Diags.emplace_back(Diagnostic::DK_Expected,
RangeTy(LastToken->range()));
D << std::string(tok::spelling(tok_r_curly));
D.note(Note::NK_ToMachThis, Matcher.range())
ExpectResult ER = expect(tok_r_curly);
if (!ER.success())
ER.diag().note(Note::NK_ToMachThis, Matcher.range())
<< std::string(tok::spelling(Matcher.kind()));
D.fix("insert }").edit(TextEdit::mkInsertion(LastToken->end(), "}"));
}
return std::make_shared<ExprAttrs>(RangeTy{Begin, LastToken->end()},
std::move(Binds), std::move(Rec));
}
Expand Down
16 changes: 16 additions & 0 deletions libnixf/test/Parse/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -560,4 +560,20 @@ TEST(Parser, ExprVar) {
ASSERT_EQ(Diags.size(), 0);
}

TEST(Parser, SyncAttrs) {
auto Src = R"(
rec {
)))
a asd = 1;
}
)"sv;

std::vector<Diagnostic> Diags;
auto AST = nixf::parse(Src, Diags);

ASSERT_TRUE(AST);

// TODO!!!
}

} // namespace

0 comments on commit a6dd59c

Please sign in to comment.