diff --git a/libnixf/include/nixf/Basic/DiagnosticKinds.inc b/libnixf/include/nixf/Basic/DiagnosticKinds.inc index b45614c89..361c3b9c5 100644 --- a/libnixf/include/nixf/Basic/DiagnosticKinds.inc +++ b/libnixf/include/nixf/Basic/DiagnosticKinds.inc @@ -11,7 +11,7 @@ DIAG("lex-float-leading-zero", FloatLeadingZero, Warning, DIAG("parse-expected", Expected, Error, "expected {}") DIAG("parse-attrpath-extra-dot", AttrPathExtraDot, Error, "extra `.` at the end of attrpath") DIAG("parse-unexpected-between", UnexpectedBetween, Error, "unexpected {} between {} and {}") -DIAG("parse-unexpected", UnexpectedText, Error, "unexpected text {}") +DIAG("parse-unexpected", UnexpectedText, Error, "unexpected text") DIAG("parse-missing-sep-formals", MissingSepFormals, Error, "missing seperator `,` between two lambda formals") DIAG("let-dynamic", LetDynamic, Error, "dynamic attributes are not allowed in let ... in ... expression") diff --git a/libnixf/include/nixf/Basic/Range.h b/libnixf/include/nixf/Basic/Range.h index 280f6b37d..f4c0e11e9 100644 --- a/libnixf/include/nixf/Basic/Range.h +++ b/libnixf/include/nixf/Basic/Range.h @@ -25,6 +25,10 @@ class Point { } Point() = default; + Point(const Point &) = default; + Point &operator=(const Point &) = default; + Point(Point &&) = default; + /// \brief Check if the point is at the given position. [[nodiscard]] bool isAt(int64_t Line, int64_t Column, std::size_t Offset) const { diff --git a/libnixf/src/Parse/Parser.cpp b/libnixf/src/Parse/Parser.cpp index acb25d1cb..514f08a7f 100644 --- a/libnixf/src/Parse/Parser.cpp +++ b/libnixf/src/Parse/Parser.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -49,6 +50,18 @@ class Parser { std::optional LastToken; std::stack State; + /// \brief Sync tokens for error recovery. + /// + /// These tokens will be considered as the end of "unknown" node. + /// We create "unknown" node for recover from "extra" token error. + /// (Also, this node is invisible in the AST) + /// + /// e.g. { foo....bar = ; } + /// ^~~ remove these tokens + /// + /// Sync tokenswill not be eat as "unknown". + std::multiset SyncTokens; + class StateRAII { Parser &P; @@ -106,6 +119,85 @@ class Parser { return LookAheadBuf[N]; } + /// \brief Consume tokens until the next sync token. + /// \returns The consumed range. If no token is consumed, return nullopt. + std::optional consumeAsUnknown() { + Point Begin = peek().begin(); + bool Consumed = false; + for (Token Tok = peek(); Tok.kind() != tok_eof; Tok = peek()) { + if (SyncTokens.contains(Tok.kind())) + break; + Consumed = true; + consume(); + } + if (!Consumed) + return std::nullopt; + assert(LastToken && "LastToken should be set after consume()"); + return RangeTy{Begin, LastToken->end()}; + } + + class SyncRAII { + Parser &P; + TokenKind Kind; + + public: + SyncRAII(Parser &P, TokenKind Kind) : P(P), Kind(Kind) { + P.SyncTokens.emplace(Kind); + } + ~SyncRAII() { P.SyncTokens.erase(Kind); } + }; + + SyncRAII withSync(TokenKind Kind) { return {*this, Kind}; } + + class ExpectResult { + bool Success; + std::optional Tok; + Diagnostic *DiagMissing; + + public: + ExpectResult(Token Tok) : Success(true), Tok(Tok), DiagMissing(nullptr) {} + ExpectResult(Diagnostic *DiagMissing) + : Success(false), DiagMissing(DiagMissing) {} + + [[nodiscard]] bool ok() const { return Success; } + [[nodiscard]] Token tok() const { + assert(Tok); + return *Tok; + } + [[nodiscard]] Diagnostic &diag() const { + assert(DiagMissing); + return *DiagMissing; + } + }; + + ExpectResult expect(TokenKind Kind) { + auto Sync = withSync(Kind); + if (Token Tok = peek(); Tok.kind() == Kind) { + return Tok; + } + // UNKNOWN ? + // ~~~~~~~ consider remove unexpected text + if (std::optional UnknownRange = consumeAsUnknown()) { + Diagnostic &D = + Diags.emplace_back(Diagnostic::DK_UnexpectedText, *UnknownRange); + D.fix("remove unexpected text").edit(TextEdit::mkRemoval(*UnknownRange)); + + if (Token Tok = peek(); Tok.kind() == Kind) { + return Tok; + } + // If the next token is not the expected one, then insert it. + // (we have two errors now). + } + // expected Kind + Point Insert = LastToken ? LastToken->end() : peek().begin(); + Diagnostic &D = + Diags.emplace_back(Diagnostic::DK_Expected, RangeTy(Insert)); + D << std::string(tok::spelling(Kind)); + D.fix("insert " + std::string(tok::spelling(Kind))) + .edit(TextEdit::mkInsertion(Insert, std::string(tok::spelling(Kind)))); + return {&D}; + } + void consume() { if (LookAheadBuf.empty()) peek(0); @@ -131,22 +223,18 @@ class Parser { Token TokDollarCurly = peek(); assert(TokDollarCurly.kind() == tok_dollar_curly); consume(); // ${ + auto Sync = withSync(tok_r_curly); assert(LastToken); /* with(PS_Expr) */ { auto ExprState = withState(PS_Expr); auto Expr = parseExpr(); if (!Expr) diagNullExpr(Diags, LastToken->end(), "interpolation"); - if (peek().kind() == tok_r_curly) { + if (ExpectResult ER = expect(tok_r_curly); ER.ok()) { consume(); // } } else { - // expected "}" for interpolation - Diagnostic &D = Diags.emplace_back(Diagnostic::DK_Expected, - RangeTy(LastToken->end())); - D << std::string(tok::spelling(tok_r_curly)); - D.note(Note::NK_ToMachThis, TokDollarCurly.range()) + ER.diag().note(Note::NK_ToMachThis, TokDollarCurly.range()) << std::string(tok::spelling(tok_dollar_curly)); - D.fix("insert }").edit(TextEdit::mkInsertion(LastToken->end(), "}")); } return Expr; } // with(PS_Expr) @@ -234,31 +322,24 @@ class Parser { assert(Quote.kind() == QuoteKind && "should be a quote"); // Consume the quote and so make the look-ahead buf empty. consume(); + auto Sync = withSync(QuoteKind); assert(LastToken && "LastToken should be set after consume()"); /* with(PS_String / PS_IndString) */ { auto StringState = withState(IsIndented ? PS_IndString : PS_String); std::shared_ptr Parts = parseStringParts(); - if (Token EndTok = peek(); EndTok.kind() == QuoteKind) { + if (ExpectResult ER = expect(QuoteKind); ER.ok()) { consume(); + return std::make_shared( + RangeTy{Quote.begin(), ER.tok().end()}, std::move(Parts)); + } else { // NOLINT(readability-else-after-return) + ER.diag().note(Note::NK_ToMachThis, Quote.range()) << QuoteSpel; return std::make_shared( RangeTy{ Quote.begin(), - EndTok.end(), + Parts->end(), }, std::move(Parts)); } - Diagnostic &D = Diags.emplace_back(Diagnostic::DK_Expected, - RangeTy(LastToken->end())); - D << QuoteSpel; - D.note(Note::NK_ToMachThis, Quote.range()) << QuoteSpel; - D.fix("insert " + QuoteSpel) - .edit(TextEdit::mkInsertion(LastToken->end(), QuoteSpel)); - return std::make_shared( - RangeTy{ - Quote.begin(), - Parts->end(), - }, - std::move(Parts)); } // with(PS_String / PS_IndString) } @@ -269,36 +350,24 @@ class Parser { auto LParen = std::make_shared(L.range()); assert(L.kind() == tok_l_paren); consume(); // ( + auto Sync = withSync(tok_r_paren); assert(LastToken && "LastToken should be set after consume()"); auto Expr = parseExpr(); if (!Expr) diagNullExpr(Diags, LastToken->end(), "parenthesized"); - if (Token R = peek(); R.kind() == tok_r_paren) { + if (ExpectResult ER = expect(tok_r_paren); ER.ok()) { consume(); // ) - auto RParen = std::make_shared(R.range()); - return std::make_shared( - RangeTy{ - L.begin(), - R.end(), - }, - std::move(Expr), std::move(LParen), std::move(RParen)); + auto RParen = std::make_shared(ER.tok().range()); + return std::make_shared(RangeTy{L.begin(), ER.tok().end()}, + std::move(Expr), std::move(LParen), + std::move(RParen)); + } else { // NOLINT(readability-else-after-return) + ER.diag().note(Note::NK_ToMachThis, L.range()) + << std::string(tok::spelling(tok_l_paren)); + return std::make_shared(RangeTy{L.begin(), LastToken->end()}, + std::move(Expr), std::move(LParen), + /*RParen=*/nullptr); } - - // Missing ")" - Diagnostic &D = - Diags.emplace_back(Diagnostic::DK_Expected, RangeTy(LastToken->end())); - D << std::string(tok::spelling(tok_r_paren)); - D.note(Note::NK_ToMachThis, L.range()) - << std::string(tok::spelling(tok_l_paren)); - D.fix("insert )") - .edit(TextEdit::mkInsertion(LastToken->end(), - std::string(tok::spelling(tok_r_paren)))); - return std::make_shared( - RangeTy{ - L.begin(), - LastToken->end(), - }, - std::move(Expr), std::move(LParen), /*RParen=*/nullptr); } // attrname : ID @@ -354,12 +423,8 @@ class Parser { } break; } - return std::make_shared( - RangeTy{ - Begin, - LastToken->end(), - }, - std::move(AttrNames)); + return std::make_shared(RangeTy{Begin, LastToken->end()}, + std::move(AttrNames)); } // binding : attrpath '=' expr ';' @@ -368,15 +433,10 @@ class Parser { if (!Path) return nullptr; assert(LastToken && "LastToken should be set after valid attrpath"); - if (Token Tok = peek(); Tok.kind() == tok_eq) { + auto SyncEq = withSync(tok_eq); + auto SyncSemi = withSync(tok_semi_colon); + if (ExpectResult ER = expect(tok_eq); ER.ok()) consume(); - } else { - // expected "=" for binding - Diagnostic &D = Diags.emplace_back(Diagnostic::DK_Expected, - RangeTy(LastToken->end())); - D << std::string(tok::spelling(tok_eq)); - D.fix("insert =").edit(TextEdit::mkInsertion(LastToken->end(), "=")); - } auto Expr = parseExpr(); if (!Expr) diagNullExpr(Diags, LastToken->end(), "binding"); @@ -391,12 +451,8 @@ class Parser { D << std::string(tok::spelling(tok_semi_colon)); D.fix("insert ;").edit(TextEdit::mkInsertion(LastToken->end(), ";")); } - return std::make_shared( - RangeTy{ - Path->begin(), - LastToken->end(), - }, - std::move(Path), std::move(Expr)); + return std::make_shared(RangeTy{Path->begin(), LastToken->end()}, + std::move(Path), std::move(Expr)); } // binds : ( binding | inherit )* @@ -416,12 +472,8 @@ class Parser { } break; } - return std::make_shared( - RangeTy{ - Begin, - LastToken->end(), - }, - std::move(Bindings)); + return std::make_shared(RangeTy{Begin, LastToken->end()}, + std::move(Bindings)); } // attrset_expr : REC? '{' binds '}' @@ -430,6 +482,8 @@ class Parser { std::shared_ptr parseExprAttrs() { std::shared_ptr Rec; + auto Sync = withSync(tok_r_curly); + // "to match this ..." // if "{" is missing, then use "rec", otherwise use "{" Token Matcher = peek(); @@ -452,17 +506,11 @@ class Parser { } assert(LastToken && "LastToken should be set after valid { or rec"); auto Binds = parseBinds(); - if (Token Tok = peek(); Tok.kind() == tok_r_curly) { + if (ExpectResult ER = expect(tok_r_curly); ER.ok()) consume(); - } else { - // expected "}" for attrset - Diagnostic &D = Diags.emplace_back(Diagnostic::DK_Expected, - RangeTy(LastToken->range())); - D << std::string(tok::spelling(tok_r_curly)); - D.note(Note::NK_ToMachThis, Matcher.range()) + else + ER.diag().note(Note::NK_ToMachThis, Matcher.range()) << std::string(tok::spelling(Matcher.kind())); - D.fix("insert }").edit(TextEdit::mkInsertion(LastToken->end(), "}")); - } return std::make_shared(RangeTy{Begin, LastToken->end()}, std::move(Binds), std::move(Rec)); } diff --git a/libnixf/test/Parse/Parser.cpp b/libnixf/test/Parse/Parser.cpp index 4a714a148..714a13df4 100644 --- a/libnixf/test/Parse/Parser.cpp +++ b/libnixf/test/Parse/Parser.cpp @@ -560,4 +560,37 @@ TEST(Parser, ExprVar) { ASSERT_EQ(Diags.size(), 0); } +TEST(Parser, SyncAttrs) { + auto Src = R"( +rec { + ))) + a asd = 1; +} + )"sv; + + std::vector Diags; + auto AST = nixf::parse(Src, Diags); + + ASSERT_TRUE(AST); + + ASSERT_EQ(Diags.size(), 1); + const auto &D = Diags[0]; + ASSERT_TRUE(D.range().begin().isAt(2, 2, 9)); + ASSERT_TRUE(D.range().end().isAt(3, 13, 26)); + ASSERT_EQ(D.kind(), Diagnostic::DK_UnexpectedText); + ASSERT_EQ(D.args().size(), 0); + + // Check the note. + ASSERT_EQ(D.notes().size(), 0); + + // Check fix-it hints. + ASSERT_EQ(D.fixes().size(), 1); + ASSERT_EQ(D.fixes()[0].edits().size(), 1); + ASSERT_EQ(D.fixes()[0].message(), "remove unexpected text"); + const auto &F = D.fixes()[0].edits()[0]; + ASSERT_TRUE(F.oldRange().begin().isAt(2, 2, 9)); + ASSERT_TRUE(F.oldRange().end().isAt(3, 13, 26)); + ASSERT_EQ(F.newText(), ""); +} + } // namespace