From e0e3ec5f37f9cf69826f3a96c815d944d62efec1 Mon Sep 17 00:00:00 2001
From: Markus Rudolph <markus.rudolph@typefox.io>
Date: Wed, 20 Mar 2024 10:15:37 +0100
Subject: [PATCH] Save the migration of MonarchGenerator

---
 hugo/content/playground/common.ts            |  10 +-
 hugo/content/playground/data.ts              | 349 +++++++++++++++----
 hugo/content/playground/monarch-generator.ts |  34 +-
 hugo/package.json                            |   2 +-
 package-lock.json                            |   2 +-
 5 files changed, 300 insertions(+), 97 deletions(-)

diff --git a/hugo/content/playground/common.ts b/hugo/content/playground/common.ts
index df5d166d..df1ebc94 100644
--- a/hugo/content/playground/common.ts
+++ b/hugo/content/playground/common.ts
@@ -6,7 +6,7 @@
 
 import {
   HelloWorldGrammar,
-  LangiumMonarchContent,
+  LangiumTextMateContent,
   DSLInitialContent,
 } from "./data.js";
 import { generateMonarch } from "./monarch-generator.js";
@@ -158,7 +158,7 @@ export async function setupPlayground(
           await setupDSLWrapper();
           overlay(false, false);
   
-        }).catch(async (e) => {
+        }).catch(async (e: any) => {
           // failed to dispose, report & discard this error
           // can happen when a previous editor was not started correctly
           console.error('DSL editor disposal error: ' + e);
@@ -224,7 +224,7 @@ async function getFreshDSLWrapper(
     monarchGrammar: generateMonarch(Grammar, languageId)
   }), htmlElement).then(() => {
     return wrapper;
-  }).catch(async (e) => {
+  }).catch(async (e: any) => {
     console.error('Failed to start DSL wrapper: ' + e);
     // don't leak the worker on failure to start
     // normally we wouldn't need to manually terminate, but if the LC is stuck in the 'starting' state, the following dispose will fail prematurely
@@ -235,7 +235,7 @@ async function getFreshDSLWrapper(
     try {
       await wrapper.dispose();
     } catch (e) {}
-    return undefined;
+    return undefined as MonacoEditorLanguageClientWrapper|undefined;
   });
 }
 
@@ -252,7 +252,7 @@ async function getFreshLangiumWrapper(htmlElement: HTMLElement): Promise<MonacoE
     languageId: "langium",
     code: currentGrammarContent,
     worker: "./libs/worker/langiumServerWorker.js",
-    monarchGrammar: LangiumMonarchContent
+    LangiumTextMateContent: LangiumTextMateContent
   }), htmlElement);
   return langiumWrapper;
 }
diff --git a/hugo/content/playground/data.ts b/hugo/content/playground/data.ts
index e3f82572..65c162b7 100644
--- a/hugo/content/playground/data.ts
+++ b/hugo/content/playground/data.ts
@@ -4,92 +4,295 @@
  * terms of the MIT License, which is available in the project root.
  ******************************************************************************/
 
-export const LangiumMonarchContent = {
-  keywords: [
-    "bigint",
-    "boolean",
-    "current",
-    "Date",
-    "entry",
-    "extends",
-    "false",
-    "fragment",
-    "grammar",
-    "hidden",
-    "import",
-    "infer",
-    "infers",
-    "interface",
-    "number",
-    "returns",
-    "string",
-    "terminal",
-    "true",
-    "type",
-    "with",
+export const LangiumTextMateContent = {
+  "name": "Langium",
+  "scopeName": "source.langium",
+  "fileTypes": [
+      "langium"
   ],
-  operators: [
-    "->",
-    ",",
-    ";",
-    ":",
-    "!",
-    "?",
-    "?=",
-    ".",
-    "..",
-    "@",
-    "*",
-    "&",
-    "+",
-    "+=",
-    "<",
-    "=",
-    "=>",
-    ">",
-    "|",
+  "patterns": [
+      {
+          "include": "#regex"
+      },
+      {
+          "include": "#comments"
+      },
+      {
+          "name": "keyword.control.langium",
+          "match": "\\b(current|entry|extends|fragment|grammar|hidden|import|infer|infers|interface|returns|terminal|type|with)\\b"
+      },
+      {
+          "name": "constant.language.langium",
+          "match": "\\b(?i:true|false)\\b"
+      },
+      {
+          "name": "keyword.symbol.langium",
+          "match": "(\\{|\\}|\\:|\\]|\\[|\\(|\\)|(\\??|\\+?)\\=|->|\\=>|<|>|\\,|\\*|\\+|\\@|\\||\\&|\\?|\\!|\\;)"
+      },
+      {
+          "name": "string.quoted.double.langium",
+          "begin": "\"",
+          "end": "\"",
+          "patterns": [
+      {
+        "include": "#string-character-escape"
+      }
+    ]
+      },
+      {
+          "name": "string.quoted.single.langium",
+          "begin": "'",
+          "end": "'",
+          "patterns": [
+      {
+        "include": "#string-character-escape"
+      }
+    ]
+      }
   ],
-  symbols:
-    /->|,|;|:|!|\?|\?=|\.|\.\.|\(|\)|\[|\[\]|\]|\{|\}|@|\*|&|\+|\+=|<|=|=>|>|\|/,
-
-  tokenizer: {
-    initial: [
+  "repository": {
+      "comments": {
+          "patterns": [
+              {
+                  "name": "comment.block.langium",
+                  "begin": "/\\*",
+                  "beginCaptures": {
+                      "0": {
+                          "name": "punctuation.definition.comment.langium"
+                      }
+                  },
+                  "end": "\\*/",
+                  "endCaptures": {
+                      "0": {
+                          "name": "punctuation.definition.comment.langium"
+                      }
+                  }
+              },
+              {
+                  "begin": "(^\\s+)?(?=//)",
+                  "beginCaptures": {
+                      "1": {
+                          "name": "punctuation.whitespace.comment.leading.cs"
+                      }
+                  },
+                  "end": "(?=$)",
+                  "name": "comment.line.langium"
+              }
+          ]
+      },
+      "string-character-escape": {
+          "name": "constant.character.escape.langium",
+          "match": "\\\\(x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|u\\{[0-9A-Fa-f]+\\}|[0-2][0-7]{0,2}|3[0-6][0-7]?|37[0-7]?|[4-7][0-7]?|.|$)"
+      },
+      "regex": {
+    "patterns": [
+      {
+        "name": "string.regexp.ts",
+        "begin": "(?<!\\+\\+|--|})(?<=[=(:,\\[?+!]|^return|[^\\._$[:alnum:]]return|^case|[^\\._$[:alnum:]]case|=>|&&|\\|\\||\\*\\/)\\s*(\\/)(?![\\/*])(?=(?:[^\\/\\\\\\[\\()]|\\\\.|\\[([^\\]\\\\]|\\\\.)+\\]|\\(([^\\)\\\\]|\\\\.)+\\))+\\/([a-z]+|(?![\\/\\*])|(?=\\/\\*))(?!\\s*[a-zA-Z0-9_$]))",
+        "beginCaptures": {
+          "1": {
+            "name": "punctuation.definition.string.begin.ts"
+          }
+        },
+        "end": "(/)([a-z]*)",
+        "endCaptures": {
+          "1": {
+            "name": "punctuation.definition.string.end.ts"
+          },
+          "2": {
+            "name": "keyword.other.ts"
+          }
+        },
+        "patterns": [
+          {
+            "include": "#regexp"
+          }
+        ]
+      },
+      {
+        "name": "string.regexp.ts",
+        "begin": "((?<![_$[:alnum:])\\]]|\\+\\+|--|}|\\*\\/)|((?<=^return|[^\\._$[:alnum:]]return|^case|[^\\._$[:alnum:]]case))\\s*)\\/(?![\\/*])(?=(?:[^\\/\\\\\\[]|\\\\.|\\[([^\\]\\\\]|\\\\.)*\\])+\\/([a-z]+|(?![\\/\\*])|(?=\\/\\*))(?!\\s*[a-zA-Z0-9_$]))",
+        "beginCaptures": {
+          "0": {
+            "name": "punctuation.definition.string.begin.ts"
+          }
+        },
+        "end": "(/)([a-z]*)",
+        "endCaptures": {
+          "1": {
+            "name": "punctuation.definition.string.end.ts"
+          },
+          "2": {
+            "name": "keyword.other.ts"
+          }
+        },
+        "patterns": [
+          {
+            "include": "#regexp"
+          }
+        ]
+      }
+    ]
+  },
+  "regexp": {
+    "patterns": [
       {
-        regex: /\/(?![*+?])(?:[^\r\n\[/\\]|\\.|\[(?:[^\r\n\]\\]|\\.)*\])+\//,
-        action: { token: "string" },
+        "name": "keyword.control.anchor.regexp",
+        "match": "\\\\[bB]|\\^|\\$"
       },
       {
-        regex: /\^?[_a-zA-Z][\w_]*/,
-        action: {
-          cases: {
-            "@keywords": { token: "keyword" },
-            "@default": { token: "ID" },
+        "match": "\\\\[1-9]\\d*|\\\\k<([a-zA-Z_$][\\w$]*)>",
+        "captures": {
+          "0": {
+            "name": "keyword.other.back-reference.regexp"
           },
+          "1": {
+            "name": "variable.other.regexp"
+          }
+        }
+      },
+      {
+        "name": "keyword.operator.quantifier.regexp",
+        "match": "[?+*]|\\{(\\d+,\\d+|\\d+,|,\\d+|\\d+)\\}\\??"
+      },
+      {
+        "name": "keyword.operator.or.regexp",
+        "match": "\\|"
+      },
+      {
+        "name": "meta.group.assertion.regexp",
+        "begin": "(\\()((\\?=)|(\\?!)|(\\?<=)|(\\?<!))",
+        "beginCaptures": {
+          "1": {
+            "name": "punctuation.definition.group.regexp"
+          },
+          "2": {
+            "name": "punctuation.definition.group.assertion.regexp"
+          },
+          "3": {
+            "name": "meta.assertion.look-ahead.regexp"
+          },
+          "4": {
+            "name": "meta.assertion.negative-look-ahead.regexp"
+          },
+          "5": {
+            "name": "meta.assertion.look-behind.regexp"
+          },
+          "6": {
+            "name": "meta.assertion.negative-look-behind.regexp"
+          }
+        },
+        "end": "(\\))",
+        "endCaptures": {
+          "1": {
+            "name": "punctuation.definition.group.regexp"
+          }
+        },
+        "patterns": [
+          {
+            "include": "#regexp"
+          }
+        ]
+      },
+      {
+        "name": "meta.group.regexp",
+        "begin": "\\((?:(\\?:)|(?:\\?<([a-zA-Z_$][\\w$]*)>))?",
+        "beginCaptures": {
+          "0": {
+            "name": "punctuation.definition.group.regexp"
+          },
+          "1": {
+            "name": "punctuation.definition.group.no-capture.regexp"
+          },
+          "2": {
+            "name": "variable.other.regexp"
+          }
+        },
+        "end": "\\)",
+        "endCaptures": {
+          "0": {
+            "name": "punctuation.definition.group.regexp"
+          }
         },
+        "patterns": [
+          {
+            "include": "#regexp"
+          }
+        ]
       },
-      { regex: /"[^"]*"|'[^']*'/, action: { token: "string" } },
-      { include: "@whitespace" },
       {
-        regex: /@symbols/,
-        action: {
-          cases: {
-            "@operators": { token: "operator" },
-            "@default": { token: "" },
+        "name": "constant.other.character-class.set.regexp",
+        "begin": "(\\[)(\\^)?",
+        "beginCaptures": {
+          "1": {
+            "name": "punctuation.definition.character-class.regexp"
           },
+          "2": {
+            "name": "keyword.operator.negation.regexp"
+          }
         },
+        "end": "(\\])",
+        "endCaptures": {
+          "1": {
+            "name": "punctuation.definition.character-class.regexp"
+          }
+        },
+        "patterns": [
+          {
+            "name": "constant.other.character-class.range.regexp",
+            "match": "(?:.|(\\\\(?:[0-7]{3}|x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}))|(\\\\c[A-Z])|(\\\\.))\\-(?:[^\\]\\\\]|(\\\\(?:[0-7]{3}|x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}))|(\\\\c[A-Z])|(\\\\.))",
+            "captures": {
+              "1": {
+                "name": "constant.character.numeric.regexp"
+              },
+              "2": {
+                "name": "constant.character.control.regexp"
+              },
+              "3": {
+                "name": "constant.character.escape.backslash.regexp"
+              },
+              "4": {
+                "name": "constant.character.numeric.regexp"
+              },
+              "5": {
+                "name": "constant.character.control.regexp"
+              },
+              "6": {
+                "name": "constant.character.escape.backslash.regexp"
+              }
+            }
+          },
+          {
+            "include": "#regex-character-class"
+          }
+        ]
       },
-    ],
-    whitespace: [
-      { regex: /\s+/, action: { token: "white" } },
-      { regex: /\/\*/, action: { token: "comment", next: "@comment" } },
-      { regex: /\/\/[^\n\r]*/, action: { token: "comment" } },
-    ],
-    comment: [
-      { regex: /[^\/\*]+/, action: { token: "comment" } },
-      { regex: /\*\//, action: { token: "comment", next: "@pop" } },
-      { regex: /[\/\*]/, action: { token: "comment" } },
-    ],
+      {
+        "include": "#regex-character-class"
+      }
+    ]
   },
+  "regex-character-class": {
+    "patterns": [
+      {
+        "name": "constant.other.character-class.regexp",
+        "match": "\\\\[wWsSdDtrnvf]|\\."
+      },
+      {
+        "name": "constant.character.numeric.regexp",
+        "match": "\\\\([0-7]{3}|x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4})"
+      },
+      {
+        "name": "constant.character.control.regexp",
+        "match": "\\\\c[A-Z]"
+      },
+      {
+        "name": "constant.character.escape.backslash.regexp",
+        "match": "\\\\."
+      }
+    ]
+  }
+  }
 };
 
 export const HelloWorldGrammar = `grammar HelloWorld
diff --git a/hugo/content/playground/monarch-generator.ts b/hugo/content/playground/monarch-generator.ts
index e0c0c68d..f8ac8006 100644
--- a/hugo/content/playground/monarch-generator.ts
+++ b/hugo/content/playground/monarch-generator.ts
@@ -4,9 +4,7 @@
  * terms of the MIT License, which is available in the project root.
  ******************************************************************************/
 
-import { GrammarUtils, stream, RegExpUtils } from "langium";
-import { GrammarAST } from "langium";
-import { languages } from "monaco-editor";
+import { GrammarAST, GrammarUtils, RegExpUtils, stream } from "langium";
 
 /**
  * Monarch Language Definition, describes aspects & token categories of target language
@@ -119,7 +117,7 @@ interface MonarchGrammar {
  * @param config Langium Config to also use during generation
  * @returns Generated Monarch syntax highlighting file content
  */
-export function generateMonarch(grammar: GrammarAST.Grammar, id: string): languages.IMonarchLanguage {
+export function generateMonarch(grammar: GrammarAST.Grammar, id: string) {
   const symbols = getSymbols(grammar);
   const regex = /[{}[\]()]/;
   const operators = symbols.filter((s) => !regex.test(s));
@@ -194,14 +192,14 @@ function getTokenizerStates(grammar: GrammarAST.Grammar): State[] {
  * @param monarchGrammar Grammar to pretty print
  * @returns Monarch grammar in concrete form
  */
-function prettyPrint(monarchGrammar: MonarchGrammar): languages.IMonarchLanguage {
+function prettyPrint(monarchGrammar: MonarchGrammar) {
   const name = monarchGrammar.languageDefinition.name;
 
   const languages = prettyPrintLangDef(monarchGrammar.languageDefinition);
   const tokenizer = prettyPrintTokenizer(monarchGrammar.tokenizer);
   return {
     ...languages,
-    tokenizer
+    ...tokenizer
   };
 }
 
@@ -236,20 +234,20 @@ function prettyPrintLangDef(
  * Pretty prints the tokenizer portion of a Monarch grammar file
  * @param tokenizer Tokenizer portion to print out
  */
-function prettyPrintTokenizer(tokenizer: Tokenizer): {
-  [name: string]: languages.IMonarchLanguageRule[];
-} {
+function prettyPrintTokenizer(tokenizer: Tokenizer) {
   const result = tokenizer.states
     .map((s) => prettyPrintState(s))
     .reduce((lhs, rhs) => ({ ...lhs, ...rhs }), {});
-  return result;
+  return {
+    tokenizer: result,
+  };
 }
 
 /**
  * Pretty prints a tokenizer state, composed of various rules
  * @param state Tokenizer state to pretty print
  */
-function prettyPrintState(state: State): {[name: string]: languages.IMonarchLanguageRule[]} {
+function prettyPrintState(state: State) {
   return {
     [state.name]: state.rules.map((r) => prettyPrintRule(r)),
   };
@@ -261,13 +259,15 @@ function prettyPrintState(state: State): {[name: string]: languages.IMonarchLang
  * @param ruleOrState Rule to pretty print. If it's a state, we include that state's contents implicitly within this context.
  * @returns Generator node containing this printed rule
  */
-function prettyPrintRule(ruleOrState: Rule | State): languages.IMonarchLanguageRule {
+function prettyPrintRule(ruleOrState: Rule | State): Rule {
   if (isRegexRule(ruleOrState)) {
-    const regex = ruleOrState.regex instanceof RegExp
-      ? ruleOrState.regex
-      : new RegExp(ruleOrState.regex);
-    const action = prettyPrintAction(ruleOrState.action);
-    return [regex, action] as const;
+    return {
+      regex:
+        ruleOrState.regex instanceof RegExp
+          ? ruleOrState.regex
+          : new RegExp(ruleOrState.regex),
+      action: prettyPrintAction(ruleOrState.action),
+    };
   } else if (isIncludeRule(ruleOrState)) {
     return ruleOrState;
   } else {
diff --git a/hugo/package.json b/hugo/package.json
index 358ea7a8..7b46358e 100644
--- a/hugo/package.json
+++ b/hugo/package.json
@@ -48,7 +48,7 @@
     "langium-statemachine-dsl": "^3.0.0",
     "langium-website-core": "~1.0.0",
     "lz-string": "^1.4.4",
-    "monaco-editor-workers": "~0.45.0",
+    "monaco-editor-workers": "~0.44.0",
     "react": "~18.2.0",
     "react-dom": "~18.2.0",
     "vscode-languageserver": "~9.0.1"
diff --git a/package-lock.json b/package-lock.json
index 0f548b3d..4d451611 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -211,7 +211,7 @@
         "langium-statemachine-dsl": "^3.0.0",
         "langium-website-core": "~1.0.0",
         "lz-string": "^1.4.4",
-        "monaco-editor-workers": "~0.45.0",
+        "monaco-editor-workers": "~0.44.0",
         "react": "~18.2.0",
         "react-dom": "~18.2.0",
         "vscode-languageserver": "~9.0.1"