Merge pull request #16 from tenzir/topic/tql2

Upgrade grammar to TQL2
tenzir · Aug 1, 2024 · 2c49c69 · 2c49c69
2 parents ec39aa6 + 15ca4c0
commit 2c49c69
Show file tree

Hide file tree

Showing 7 changed files with 328 additions and 115 deletions.
diff --git a/package.json b/package.json
@@ -10,8 +10,8 @@
   "version": "0.3.2",
   "description": "Tenzir Query Language (TQL) support for CodeMirror",
   "scripts": {
-    "build": "lezer-generator src/syntax.grammar -o src/parser && rollup -c",
-    "build-debug": "lezer-generator src/syntax.grammar --names -o src/parser && rollup -c",
+    "build": "lezer-generator src/tql.grammar -o src/parser && rollup -c",
+    "build-debug": "lezer-generator src/tql.grammar --names -o src/parser && rollup -c",
     "test": "mocha test/test.js"
   },
   "type": "module",

diff --git a/src/index.ts b/src/index.ts
@@ -1,20 +1,19 @@
-import { parser } from "./syntax.grammar";
+import { parser } from "./tql.grammar";
 import { LRLanguage, LanguageSupport } from "@codemirror/language";
 import { styleTags, tags as t } from "@lezer/highlight";
-import { completeFromList } from "@codemirror/autocomplete";
+import { completeFromList, Completion } from "@codemirror/autocomplete";
 import { data } from "../output.js";
 
 export const TenzirQueryLang = LRLanguage.define({
   parser: parser.configure({
     props: [
       styleTags({
-        "Null Bool Number Ip String Time": t.literal,
-        "OperatorName!": t.name,
-        Punct: t.punctuation,
-        Type: t.typeName,
-        Pipe: t.separator,
+        "Scalar true false null DollarIdent": t.literal,
+        "String": t.string,
+        "StringEsc and else if in let match meta not or this": t.keyword,
+        "OpName! FnIdent": t.name,
+        "+ - \"*\" \"/\" = . ' : \"!\" < > \"?\" \"|\"": t.punctuation,
         "LineComment BlockComment": t.comment,
-        Meta: t.meta,
       }),
     ],
     // TODO: add folding later
@@ -29,13 +28,6 @@ type GeneretedCompletion = {
   docLink: string;
 };
 
-type Completion = {
-  label: string;
-  type: string;
-  detail: string;
-  info: () => Node;
-};
-
 const getCompletion = (completion: GeneretedCompletion): Completion => {
   return {
     label: completion.label,

diff --git a/src/syntax.grammar b/src/syntax.grammar
diff --git a/src/tokens.ts b/src/tokens.ts
@@ -0,0 +1,100 @@
+import { ExternalTokenizer, ContextTracker, Stack } from "@lezer/lr"
+import {
+  ignoredNewline,
+  newline,
+  LParen,
+  RParen,
+  LBrace,
+  RBrace,
+  LBracket,
+  RBracket,
+  Comma,
+  Ident,
+  FnIdent,
+  DollarIdent
+} from "./parser.terms.js"
+
+type ContextData = {
+  ignoreNewlines: boolean
+  justHadComma: boolean
+}
+
+class MyContext {
+  constructor(parent: MyContext | null, data: ContextData) {
+    this.parent = parent ?? this
+    this.data = data
+  }
+
+  public ignoreNewlines(value: boolean): MyContext {
+    return new MyContext(this, { ...this.data, ignoreNewlines: value })
+  }
+
+  public justHadComma(value: boolean): MyContext {
+    return new MyContext(this.parent, { ...this.data, justHadComma: value })
+  }
+
+  parent: MyContext
+  data: ContextData
+}
+
+const startContext = new MyContext(null, { ignoreNewlines: false, justHadComma: false })
+
+export const context = new ContextTracker({
+  start: startContext,
+  reduce(context, term, stack, input) {
+    return context
+  },
+  shift(context, term, stack, input) {
+    context = context.justHadComma(term === Comma)
+    if (term === LParen || term == LBracket) {
+      return context.ignoreNewlines(true)
+    }
+    if (term === LBrace) {
+      return context.ignoreNewlines(false)
+    }
+    if (term === RParen || term == RBrace || term == RBracket) {
+      return context.parent
+    }
+    return context
+  }
+})
+
+function code(x: string): number {
+  return x.charCodeAt(0)
+}
+
+export const newlines = new ExternalTokenizer((input, stack) => {
+  let ctx = stack.context.data;
+  if (input.next == code("\n")) {
+    let ignore = ctx.ignoreNewlines || ctx.justHadComma
+    input.acceptToken(ignore ? ignoredNewline : newline, 1)
+    return
+  }
+}, { contextual: true })
+
+export const identifiers = new ExternalTokenizer((input, stack) => {
+  const a = code("a");
+  const z = code("z");
+  const A = code("A");
+  const Z = code("Z");
+  const u = code("_");
+  const n0 = code("0");
+  const n9 = code("9");
+  const first = (n: number) => (a <= n && n <= z) || (A <= n && n <= Z) || (n == u);
+  const rest = (n: number) => first(n) || (n0 <= n && n <= n9);
+  let token = Ident;
+  if (!first(input.peek(0))) {
+    if (input.peek(0) != code("$")) {
+      return;
+    }
+    token = DollarIdent;
+  }
+  let n = 1;
+  while (rest(input.peek(n))) {
+    n += 1;
+  }
+  if (input.peek(n) == code("(")) {
+    token = FnIdent;
+  }
+  input.acceptToken(token, n);
+})
diff --git a/src/tql.grammar b/src/tql.grammar
@@ -0,0 +1,135 @@
+@context context from "./tokens.ts"
+
+@top Pipeline { _Statements? }
+
+_Statements {
+  newline* (Pipe newline*)? _Stmt ((newline | Pipe) _Stmt?)*
+}
+
+_Stmt {
+  OpStmt |
+  AssignStmt |
+  (kw<"let"> | kw<"if"> | kw<"match">) Soup }
+
+OpStmt { OpName ~op_name ~op_name2 Soup }
+
+AssignStmt[@dynamicPrecedence=1] {
+  UnExpr ~op_name2 "=" Soup
+}
+
+OpName { Ident ~op_name } // TODO: Why do we need this here?
+// Entity { Ident ("'" Ident)* }
+
+// TODO: Expand this.
+UnExpr { Ident ~op_name2 ("." (Ident | ".")*)? }
+
+Soup { _Any* }
+
+// Anything but { } | and newline.
+_Any {
+  FnIdent |
+  Ident |
+  DollarIdent |
+  Scalar |
+  _Punct |
+  _Keyword |
+  RecordStart RecordRest |
+  PipeExpr |
+  String
+}
+
+RecordStart[@dynamicPrecedence=2] {
+  "{" newline* Ident ~op_name ":"
+}
+
+RecordRest {
+  (_Any | newline | Pipe)* "}"
+}
+
+PipeExpr {
+  "{" _Statements "}"
+}
+
+_Keyword {
+  kw<"and"> |
+  kw<"else"> |
+  kw<"false"> |
+  kw<"if"> |
+  kw<"in"> |
+  kw<"let"> |
+  kw<"match"> |
+  kw<"meta"> |
+  kw<"not"> |
+  kw<"null"> |
+  kw<"or"> |
+  kw<"this"> |
+  kw<"true">
+}
+
+_Punct {
+  "+" |
+  "-" |
+  "*" |
+  "/" |
+  "," |
+  "=" |
+  "." |
+  "'" |
+  ":" |
+  "!" |
+  "?" |
+  "<" |
+  ">" |
+  "[" |
+  "]" |
+  "(" |
+  ")"
+}
+
+Pipe { "|" }
+
+@tokens {
+  space { ($[ \t\f] | "\\" $[\n\r])+ }
+  LineComment { "//" ![\n\r]* }
+  ","[@export=Comma]
+  "("[@export=LParen] ")"[@export=RParen]
+  "{"[@export=LBrace] "}"[@export=RBrace]
+  "["[@export=LBracket] "]"[@export=RBracket]
+  Scalar { $[0-9]($[0-9] | "." | "_")* $[a-zA-Z0-9_]* }
+  @precedence { LineComment "/" }
+  "+" "-" "*" "/" "," "=" "." "'" ":" "!" "?" "<" ">"
+  stringContent { ![\\\"]+ }
+  StringEsc { "\\" _ }  // TODO: Complete it.
+}
+
+
+@skip {} {
+  String {
+    "\"" (stringContent | StringEsc)* "\""
+  }
+}
+
+
+@skip { space | ignoredNewline | LineComment | BlockComment }
+
+@skip {} {
+  BlockComment { "/*" blockCommentContent* blockCommentEnd }
+}
+
+@local tokens {
+  blockCommentEnd { "*/" }
+  @else blockCommentContent
+}
+
+@external tokens newlines from "./tokens.ts" {
+  newline,
+  ignoredNewline
+}
+
+@external tokens identifiers from "./tokens.ts" {
+  Ident,
+  FnIdent,
+  DollarIdent
+}
+
+kw<term> { @specialize[@name={term}]<Ident, term> }
diff --git a/src/syntax.grammar.d.ts → src/tql.grammar.ts b/src/syntax.grammar.d.ts → src/tql.grammar.ts