From 205e139e3f985d6c5a171b16ddff3f548b7ae387 Mon Sep 17 00:00:00 2001 From: Yingchi Long Date: Sat, 16 Sep 2023 01:32:19 +0800 Subject: [PATCH] nixd/Syntax: init (WIP) --- nixd/include/nixd/Parser/Epilogue.cpp | 48 --- nixd/include/nixd/Parser/Parser.h | 63 ---- nixd/include/nixd/Parser/Prologue.cpp | 281 ------------------ nixd/include/nixd/Parser/Provides.h | 17 -- nixd/include/nixd/Parser/Require.h | 70 ----- .../Prologue.h => Syntax/Lexer/Prologue.cpp} | 5 +- nixd/include/nixd/Syntax/Nodes.h | 64 ++++ nixd/include/nixd/Syntax/Parser/Prologue.cpp | 10 + nixd/include/nixd/Syntax/Parser/Require.h | 38 +++ nixd/lib/Syntax/Lexer.l | 235 +++++++++++++++ nixd/lib/Syntax/Parser.y | 186 ++++++++++++ nixd/lib/Syntax/meson.build | 38 +++ nixd/lib/meson.build | 3 +- 13 files changed, 575 insertions(+), 483 deletions(-) delete mode 100644 nixd/include/nixd/Parser/Epilogue.cpp delete mode 100644 nixd/include/nixd/Parser/Parser.h delete mode 100644 nixd/include/nixd/Parser/Prologue.cpp delete mode 100644 nixd/include/nixd/Parser/Provides.h delete mode 100644 nixd/include/nixd/Parser/Require.h rename nixd/include/nixd/{Lexer/Prologue.h => Syntax/Lexer/Prologue.cpp} (93%) create mode 100644 nixd/include/nixd/Syntax/Nodes.h create mode 100644 nixd/include/nixd/Syntax/Parser/Prologue.cpp create mode 100644 nixd/include/nixd/Syntax/Parser/Require.h create mode 100644 nixd/lib/Syntax/Lexer.l create mode 100644 nixd/lib/Syntax/Parser.y create mode 100644 nixd/lib/Syntax/meson.build diff --git a/nixd/include/nixd/Parser/Epilogue.cpp b/nixd/include/nixd/Parser/Epilogue.cpp deleted file mode 100644 index 2fb356a5c..000000000 --- a/nixd/include/nixd/Parser/Epilogue.cpp +++ /dev/null @@ -1,48 +0,0 @@ -#pragma once - -#include "Parser.tab.h" - -#include "Lexer.tab.h" - -#include "nixd/Expr/Expr.h" - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -namespace nixd { - -using namespace nix; - -std::unique_ptr parse(char *text, size_t length, Pos::Origin origin, - const SourcePath &basePath, ParseState state) { - yyscan_t scanner; - std::unique_ptr data = std::unique_ptr(new ParseData{ - .state = - { - .symbols = state.symbols, - .positions = state.positions, - }, - .basePath = std::move(basePath), - .origin = {origin}, - }); - - yylex_init(&scanner); - yy_scan_buffer(text, length, scanner); - yyparse(scanner, data.get()); - yylex_destroy(scanner); - data->STable = std::make_unique(state.symbols); - data->PTable = std::make_unique(state.positions); - return data; // NRVO -} - -} // namespace nixd diff --git a/nixd/include/nixd/Parser/Parser.h b/nixd/include/nixd/Parser/Parser.h deleted file mode 100644 index 9ccb6a565..000000000 --- a/nixd/include/nixd/Parser/Parser.h +++ /dev/null @@ -1,63 +0,0 @@ -#pragma once - -#include "Parser.tab.h" - -#include "nixd/Parser/Require.h" - -#include -#include - -#include -#include - -namespace nixd { - -std::unique_ptr parse(char *Text, size_t Length, - nix::Pos::Origin Origin, - const nix::SourcePath &BasePath, - ParseState State); - -inline std::unique_ptr parse(char *Text, size_t Length, - nix::Pos::Origin Origin, - const nix::SourcePath &BasePath) { - nix::SymbolTable Symbols; - nix::PosTable Positions; - ParseState State{Symbols, Positions}; - return parse(Text, Length, std::move(Origin), BasePath, State); -} - -inline std::unique_ptr parse(std::string Text, - nix::Pos::Origin Origin, - const nix::SourcePath &BasePath) { - Text.append("\0\0", 2); - return parse(Text.data(), Text.length(), std::move(Origin), BasePath); -} - -inline std::unique_ptr parse(std::string Text, - const std::string &Path) { - Text.append("\0\0", 2); - auto FSPath = std::filesystem::path(Path); - auto Origin = nix::CanonPath(FSPath.string()); - auto BasePath = nix::CanonPath(FSPath.remove_filename().string()); - return parse(Text.data(), Text.length(), std::move(Origin), - std::move(BasePath)); -} - -inline std::unique_ptr parse(char *Text, size_t Length, - nix::Pos::Origin Origin, - const nix::SourcePath &BasePath, - nix::EvalState &State) { - auto Data = parse(Text, Length, std::move(Origin), BasePath, - ParseState{State.symbols, State.positions}); - return Data; -} - -inline std::unique_ptr parse(std::string Text, - nix::Pos::Origin Origin, - const nix::SourcePath &BasePath, - nix::EvalState &State) { - Text.append("\0\0", 2); - return parse(Text.data(), Text.length(), std::move(Origin), BasePath, State); -} - -} // namespace nixd diff --git a/nixd/include/nixd/Parser/Prologue.cpp b/nixd/include/nixd/Parser/Prologue.cpp deleted file mode 100644 index 75da3986e..000000000 --- a/nixd/include/nixd/Parser/Prologue.cpp +++ /dev/null @@ -1,281 +0,0 @@ - -#pragma once - -#include "Parser.tab.h" - -#include "Lexer.tab.h" - -#include "Provides.h" -#include "Require.h" - -#include -#include -#include - -YY_DECL; - -namespace nixd { - -using nix::absPath; -using nix::AttrName; -using nix::AttrPath; -using nix::Error; -using nix::ErrorInfo; -using nix::evalSettings; -using nix::experimentalFeatureSettings; -using nix::Expr; -using nix::Formal; -using nix::Formals; -using nix::getHome; -using nix::hintfmt; -using nix::noPos; -using nix::Path; -using nix::PosIdx; -using nix::PosTable; -using nix::SourcePath; -using nix::Symbol; -using nix::SymbolTable; -using nix::Xp; - -using namespace nixd::nodes; - -static void dupAttr(ParseData &data, const AttrPath &attrPath, const PosIdx pos, - const PosIdx prevPos) { - data.error.emplace_back( - nix::ErrorInfo{.msg = hintfmt("attribute '%1%' already defined at %2%", - showAttrPath(data.state.symbols, attrPath), - data.state.positions[prevPos]), - .errPos = data.state.positions[pos]}); -} - -static void dupAttr(ParseData &data, Symbol attr, const PosIdx pos, - const PosIdx prevPos) { - data.error.emplace_back(nix::ErrorInfo{ - .msg = hintfmt("attribute '%1%' already defined at %2%", - data.state.symbols[attr], data.state.positions[prevPos]), - .errPos = data.state.positions[pos]}); -} - -static void addAttr(nix::ExprAttrs *attrs, AttrPath &&attrPath, nix::Expr *e, - const nix::PosIdx pos, ParseData &data) { - AttrPath::iterator i; - // All attrpaths have at least one attr - assert(!attrPath.empty()); - // Checking attrPath validity. - // =========================== - for (i = attrPath.begin(); i + 1 < attrPath.end(); i++) { - if (i->symbol) { - ExprAttrs::AttrDefs::iterator j = attrs->attrs.find(i->symbol); - if (j != attrs->attrs.end()) { - if (!j->second.inherited) { - ExprAttrs *attrs2 = dynamic_cast(j->second.e); - if (!attrs2) { - - dupAttr(data, attrPath, pos, j->second.pos); - return; - } - attrs = attrs2; - } else { - - dupAttr(data, attrPath, pos, j->second.pos); - return; - } - } else { - ExprAttrs *nested = data.ctx.record(new ExprAttrs); - attrs->attrs[i->symbol] = ExprAttrs::AttrDef(nested, pos); - attrs = nested; - } - } else { - ExprAttrs *nested = data.ctx.record(new ExprAttrs); - attrs->dynamicAttrs.push_back( - ExprAttrs::DynamicAttrDef(i->expr, nested, pos)); - attrs = nested; - } - } - // Expr insertion. - // ========================== - if (i->symbol) { - ExprAttrs::AttrDefs::iterator j = attrs->attrs.find(i->symbol); - if (j != attrs->attrs.end()) { - // This attr path is already defined. However, if both - // e and the expr pointed by the attr path are two attribute sets, - // we want to merge them. - // Otherwise, throw an error. - auto ae = dynamic_cast(e); - auto jAttrs = dynamic_cast(j->second.e); - if (jAttrs && ae) { - for (auto &ad : ae->attrs) { - auto j2 = jAttrs->attrs.find(ad.first); - if (j2 != jAttrs->attrs.end()) { - // Attr already defined in iAttrs, error. - - dupAttr(data, ad.first, j2->second.pos, ad.second.pos); - return; - } - jAttrs->attrs.emplace(ad.first, ad.second); - } - } else { - - dupAttr(data, attrPath, pos, j->second.pos); - return; - } - } else { - // This attr path is not defined. Let's create it. - attrs->attrs.emplace(i->symbol, ExprAttrs::AttrDef(e, pos)); - e->setName(i->symbol); - } - } else { - attrs->dynamicAttrs.push_back(ExprAttrs::DynamicAttrDef(i->expr, e, pos)); - } -} - -static Formals *toFormals(ParseData &data, ParserFormals *formals, - PosIdx pos = nix::noPos, Symbol arg = {}) { - std::sort(formals->formals.begin(), formals->formals.end(), - [](const auto &a, const auto &b) { - return std::tie(a.name, a.pos) < std::tie(b.name, b.pos); - }); - - std::optional> duplicate; - for (size_t i = 0; i + 1 < formals->formals.size(); i++) { - if (formals->formals[i].name != formals->formals[i + 1].name) - continue; - std::pair thisDup{formals->formals[i].name, formals->formals[i + 1].pos}; - duplicate = std::min(thisDup, duplicate.value_or(thisDup)); - } - if (duplicate) - data.error.emplace_back(nix::ErrorInfo{ - .msg = hintfmt("duplicate formal function argument '%1%'", - data.state.symbols[duplicate->first]), - .errPos = data.state.positions[duplicate->second]}); - - Formals result; - result.ellipsis = formals->ellipsis; - result.formals = std::move(formals->formals); - - if (arg && result.has(arg)) - data.error.emplace_back(nix::ErrorInfo{ - .msg = hintfmt("duplicate formal function argument '%1%'", - data.state.symbols[arg]), - .errPos = data.state.positions[pos]}); - - return data.FsCtx.record(new Formals(std::move(result))); -} - -static Expr *stripIndentation( - ParseData &data, const PosIdx pos, SymbolTable &symbols, - std::vector>> &&es) { - if (es.empty()) - return data.ctx.record(new ExprString("")); - - /* Figure out the minimum indentation. Note that by design - whitespace-only final lines are not taken into account. (So - the " " in "\n ''" is ignored, but the " " in "\n foo''" is.) */ - bool atStartOfLine = true; /* = seen only whitespace in the current line */ - size_t minIndent = 1000000; - size_t curIndent = 0; - for (auto &[i_pos, i] : es) { - auto *str = std::get_if(&i); - if (!str || !str->hasIndentation) { - /* Anti-quotations and escaped characters end the current start-of-line - * whitespace. */ - if (atStartOfLine) { - atStartOfLine = false; - if (curIndent < minIndent) - minIndent = curIndent; - } - continue; - } - for (size_t j = 0; j < str->l; ++j) { - if (atStartOfLine) { - if (str->p[j] == ' ') - curIndent++; - else if (str->p[j] == '\n') { - /* Empty line, doesn't influence minimum - indentation. */ - curIndent = 0; - } else { - atStartOfLine = false; - if (curIndent < minIndent) - minIndent = curIndent; - } - } else if (str->p[j] == '\n') { - atStartOfLine = true; - curIndent = 0; - } - } - } - - /* Strip spaces from each line. */ - auto *es2 = new std::vector>; - data.SPCtx.record(es2); - atStartOfLine = true; - size_t curDropped = 0; - size_t n = es.size(); - auto i = es.begin(); - const auto trimExpr = [&](Expr *e) { - atStartOfLine = false; - curDropped = 0; - es2->emplace_back(i->first, e); - }; - const auto trimString = [&](const StringToken &t) { - std::string s2; - for (size_t j = 0; j < t.l; ++j) { - if (atStartOfLine) { - if (t.p[j] == ' ') { - if (curDropped++ >= minIndent) - s2 += t.p[j]; - } else if (t.p[j] == '\n') { - curDropped = 0; - s2 += t.p[j]; - } else { - atStartOfLine = false; - curDropped = 0; - s2 += t.p[j]; - } - } else { - s2 += t.p[j]; - if (t.p[j] == '\n') - atStartOfLine = true; - } - } - - /* Remove the last line if it is empty and consists only of - spaces. */ - if (n == 1) { - std::string::size_type p = s2.find_last_of('\n'); - if (p != std::string::npos && - s2.find_first_not_of(' ', p + 1) == std::string::npos) - s2 = std::string(s2, 0, p + 1); - } - - es2->emplace_back(i->first, data.ctx.record(new ExprString(std::move(s2)))); - }; - for (; i != es.end(); ++i, --n) { - std::visit(nix::overloaded{trimExpr, trimString}, i->second); - } - - /* If this is a single string, then don't do a concatenation. */ - if (es2->size() == 1 && dynamic_cast((*es2)[0].second)) { - auto *const result = (*es2)[0].second; - return result; - } - return data.ctx.record(new ExprConcatStrings(pos, true, es2)); -} - -} // namespace nixd - -using namespace nixd; - -#define CUR_POS makeCurPos(*yylocp, data) - -void yyerror(YYLTYPE *loc, yyscan_t scanner, ParseData *data, - const char *error) { - data->error.push_back( - {.msg = hintfmt(error), - .errPos = data->state.positions[makeCurPos(*loc, data)]}); -} - -template T *M(nixd::ParseData *data, T *node) { - return data->ctx.addNode(std::unique_ptr(node)); -} diff --git a/nixd/include/nixd/Parser/Provides.h b/nixd/include/nixd/Parser/Provides.h deleted file mode 100644 index 748a76a5d..000000000 --- a/nixd/include/nixd/Parser/Provides.h +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -#include "Parser.tab.h" - -#include - -namespace nixd { - -static inline nix::PosIdx makeCurPos(const YYLTYPE &loc, ParseData *data) { - auto Res = - data->state.positions.add(data->origin, loc.first_line, loc.first_column); - data->end[Res] = - data->state.positions.add(data->origin, loc.last_line, loc.last_column); - return Res; -} - -} // namespace nixd diff --git a/nixd/include/nixd/Parser/Require.h b/nixd/include/nixd/Parser/Require.h deleted file mode 100644 index 6f2c72e79..000000000 --- a/nixd/include/nixd/Parser/Require.h +++ /dev/null @@ -1,70 +0,0 @@ -#pragma once - -#include "nixd/Expr/Expr.h" -#include "nixd/Expr/Nodes.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -// using C a struct allows us to avoid having to define the special -// members that using string_view here would implicitly delete. -struct StringToken { - const char *p; - size_t l; - bool hasIndentation; - operator std::string_view() const { return {p, l}; } -}; - -namespace nixd { - -struct ParserFormals { - std::vector formals; - bool ellipsis = false; -}; -struct ParseState { - nix::SymbolTable &symbols; - nix::PosTable &positions; -}; - -struct ParseData { - using IndStringParts = std::vector< - std::pair>>; - using StringParts = std::vector>; - using AttrNames = std::vector; - - std::unique_ptr STable; - std::unique_ptr PTable; - - ParseState state; - nix::Expr *result; - nix::SourcePath basePath; - nix::PosTable::Origin origin; - std::vector error; - std::map end; - std::map locations; - - ASTContext ctx; - - Context PFCtx; - Context FCtx; - Context FsCtx; - Context APCtx; - - Context ANCtx; - Context SPCtx; - Context ISPCtx; -}; - -} // namespace nixd - -#define YY_DECL \ - int yylex(YYSTYPE *yylval_param, YYLTYPE *yylloc_param, yyscan_t yyscanner, \ - nixd::ParseData *data) diff --git a/nixd/include/nixd/Lexer/Prologue.h b/nixd/include/nixd/Syntax/Lexer/Prologue.cpp similarity index 93% rename from nixd/include/nixd/Lexer/Prologue.h rename to nixd/include/nixd/Syntax/Lexer/Prologue.cpp index 7bfc2d049..37d42c873 100644 --- a/nixd/include/nixd/Lexer/Prologue.h +++ b/nixd/include/nixd/Syntax/Lexer/Prologue.cpp @@ -8,8 +8,6 @@ #include "Parser.tab.h" -#include "nixd/Parser/Provides.h" - #include using namespace nix; @@ -48,7 +46,8 @@ static void adjustLoc(YYLTYPE *loc, const char *s, size_t len) { // we make use of the fact that the parser receives a private copy of the input // string and can munge around in it. -static StringToken unescapeStr(SymbolTable &symbols, char *s, size_t length) { +static nixd::syntax::StringToken unescapeStr(SymbolTable &symbols, char *s, + size_t length) { char *result = s; char *t = s; char c; diff --git a/nixd/include/nixd/Syntax/Nodes.h b/nixd/include/nixd/Syntax/Nodes.h new file mode 100644 index 000000000..ce1847357 --- /dev/null +++ b/nixd/include/nixd/Syntax/Nodes.h @@ -0,0 +1,64 @@ +/// FIXME: comment for this file. +#pragma once + +#include + +namespace nixd::syntax { + +/// Syntax nodes +/// TODO: the comment +struct Node { + nix::PosIdx Start; + nix::PosIdx End; +}; + +struct Function : Node {}; + +struct Identifier : Node {}; + +struct If : Node {}; + +struct ExprOp : Node {}; + +struct UnaryOp : ExprOp {}; + +struct BinaryOp : ExprOp {}; + +struct Apply : Node {}; + +struct Select : Node {}; + +struct Simple : Node {}; + +struct Variable : Node { + Identifier ID; +}; + +struct Int : Node {}; + +struct Float : Node {}; + +struct StringParts : Node {}; + +struct StringPartsInterpolated : Node {}; + +struct List : Node { + std::vector