Merge pull request #38 from ndy2/refactor-get-exclude-indinces

support more precise escape for code blocks
ndy2 · Mar 8, 2024 · daa5189 · daa5189
2 parents 695afda + f4dc9b3
commit daa5189
Show file tree

Hide file tree

Showing 9 changed files with 328 additions and 156 deletions.
diff --git a/docs/features/admonition/backquotes.md b/docs/features/admonition/backquotes.md
@@ -13,32 +13,38 @@ comments: true
 
 ###  Admonition Backquotes
 
-=== "obsidian markdown"
-~~~
+
+~~~tabs
+---tab obsidian markdown
+````
 ```ad-tip
 title: This is a tip
 
 This is the content of the admonition tip.
 ```
+````
+---tab obsidian rendered
+![[images/backquotes_1.png]]
 ~~~
 
-=== "obsidian rendered"
-![[images/backquotes_1.png]]
 
-### mkdocs-material admonition
 
-=== "mkdocs-material markdown"
 
+
+### mkdocs-material admonition
+
+~~~tabs
+---tab mkdocs-material markdown
 ```
 !!!tip "This is a tip"
 
     This is the content of the admonition tip.
 ```
 
-=== "mkdocs-material rendered"
-
+---tab mkdocs-material rendered
 ```ad-tip
 title: This is a tip
 
 This is the content of the admonition tip.
-```
+```
+~~~
diff --git a/docs/features/comment/index.md b/docs/features/comment/index.md
@@ -12,41 +12,39 @@ comments: true
 ## Usage
 
 ### obsidian comment
-
-=== "obsidian markdown"
-
-```
+```tabs
+---tab obsidian markdown
+~~~
 This is an %%inline%% comment.
 
 %% 
 This is a block comment. 
 Block comments can span multiple lines. 
 %%
-```
-
-=== "obsidian reading view"
+~~~
 
+---tab obsidian reading view
 ![[images/comment_1.png]]
+```
 
-### mkdocs-material comment
 
-=== "mkdocs-material markdown"
+### mkdocs-material comment
 
-```
+```tabs
+---tab mkdocs-material markdown
+~~~
 This is an <!--inline--> comment.
 
 <!--
 This is a block comment. 
 Block comments can span multiple lines. 
 -->
-```
-
-=== "mkdocs-material rendered"
+~~~
 
+---tab mkdocs-material rendered
 This is an %%inline%% comment.
-
 %% 
 This is a block comment. 
 Block comments can span multiple lines. 
 %%
-
+```
diff --git a/docs/features/tabs/index.md b/docs/features/tabs/index.md
@@ -54,6 +54,8 @@ This tab contains callout
 
 ### mkdocs-material content tabs
 
+**mkdocs markdown**
+
 ~~~~
 === "First *tab*"
     
@@ -92,6 +94,8 @@ This tab contains callout
 ~~~~
 
 
+**rendered**
+
 ```tabs
 ---tab First *tab*
 This is a *sample* **tab** with some markdown :

diff --git a/obsidian_support/conversion/abstract_conversion.py b/obsidian_support/conversion/abstract_conversion.py
@@ -1,11 +1,11 @@
-import re
 from abc import *
-from enum import Enum
-from typing import List, Tuple
+from typing import List
 
 from mkdocs.structure.pages import Page
 from overrides import final
 
+from obsidian_support.conversion.util import is_overlapped, get_exclude_indices
+
 """
 An abstract class that implies a conversion from `obsidian syntax` to `mkdocs-material syntax`
 
@@ -20,32 +20,6 @@
 SyntaxGroup = List[str]
 
 
-class MarkdownPatterns(Enum):
-    # Exclude Patterns
-    """ regex that matches markdown `tilde code block` (triple tilde syntax)"""
-    TILDE_CODE_BLOCK = r"([A-Za-z \t]*)~~~([-A-Za-z]*)?\n([\s\S]*?)~~~([A-Za-z \t]*)*"
-
-    """regex that matches markdown `backtick code block` (triple backtick syntax)"""
-    BACKTICK_CODE_BLOCK = r"([A-Za-z \t]*)```([-A-Za-z]*)?\n([\s\S]*?)```([A-Za-z \t]*)*"
-
-    """regex that matches markdown code (single backtick syntax)"""
-    BACKTICK_CODE = r"`[\S\s]*?`"
-
-    # Do Not Exclude Patterns
-    """regex that matches markdown `admonition backquotes` (triple tilde syntax)"""
-    ADMONITION_BACKQUOTES_CODE_BLOCK = r"([A-Za-z \t]*)```ad-([-A-Za-z]*)?\n([\s\S]*?)```([A-Za-z \t]*)*"
-
-    """regex that matches markdown `tabs backquotes code block` (triple tilde syntax)"""
-    TABS_BACKQUOTES_CODE_BLOCK = r"([A-Za-z \t]*)```tabs([-A-Za-z]*)?\n([\s\S]*?)```([A-Za-z \t]*)*"
-
-    """regex that matches markdown `tabs tilde code block` (triple tilde syntax)"""
-    TABS_TILDE_CODE_BLOCK = r"([A-Za-z \t]*)~~~tabs([-A-Za-z]*)?\n([\s\S]*?)~~~([A-Za-z \t]*)*"
-
-    @property
-    def regex(self):
-        return self.value
-
-
 class AbstractConversion(metaclass=ABCMeta):
 
     @property
@@ -69,16 +43,15 @@ def convert(self, syntax_groups: SyntaxGroup, page: Page) -> str:
     def markdown_convert(self, markdown: str, page: Page) -> str:
         converted_markdown = ""
         index = 0
-        excluded_indices = self._get_excluded_indices(markdown)
-        excluded_indices = self._filter_do_not_exclude_indices(excluded_indices, markdown)
+        excluded_indices = get_exclude_indices(markdown)
 
         for obsidian_syntax in self.obsidian_regex_pattern.finditer(markdown):
-            ## found range of markdown where the obsidian_regex matches
+            # find range of markdown where the obsidian_regex matches
             start = obsidian_syntax.start()
             end = obsidian_syntax.end() - 1
 
-            ## continue if match is in excluded range
-            if self._is_overlapped(start, end, excluded_indices):
+            # continue if match is in excluded range
+            if is_overlapped(start, end, excluded_indices):
                 continue
 
             syntax_groups = list(map(lambda group: obsidian_syntax.group(group), self.obsidian_regex_groups))
@@ -90,46 +63,3 @@ def markdown_convert(self, markdown: str, page: Page) -> str:
 
         converted_markdown += markdown[index:len(markdown)]
         return converted_markdown
-
-    @staticmethod
-    def _get_excluded_indices(markdown: str) -> List[tuple]:
-        tilde_code_block_indices = []
-        for code in re.finditer(MarkdownPatterns.TILDE_CODE_BLOCK.regex, markdown):
-            tilde_code_block_indices.append((code.start(), code.end() - 1))
-
-        backtick_code_block_indices = []
-        for code in re.finditer(MarkdownPatterns.BACKTICK_CODE_BLOCK.regex, markdown):
-            if not AbstractConversion._is_overlapped(code.start(), code.end() - 1, tilde_code_block_indices):
-                backtick_code_block_indices.append((code.start(), code.end() - 1))
-
-        backtick_code_indices = []
-        for code in re.finditer(MarkdownPatterns.BACKTICK_CODE.regex, markdown):
-            if not AbstractConversion._is_overlapped(code.start(), code.end() - 1, tilde_code_block_indices) and \
-                    not AbstractConversion._is_overlapped(code.start(), code.end() - 1, backtick_code_block_indices):
-                backtick_code_indices.append((code.start(), code.end() - 1))
-
-        return tilde_code_block_indices + backtick_code_block_indices + backtick_code_indices
-
-    @staticmethod
-    def _filter_do_not_exclude_indices(exclude_indices: List[Tuple], markdown) -> List[Tuple]:
-        admonition_backquotes_code_block_indices = []
-        for code in re.finditer(MarkdownPatterns.ADMONITION_BACKQUOTES_CODE_BLOCK.regex, markdown):
-            admonition_backquotes_code_block_indices.append((code.start(), code.end() - 1))
-
-        tabs_backquotes_code_block_indices = []
-        for code in re.finditer(MarkdownPatterns.TABS_BACKQUOTES_CODE_BLOCK.regex, markdown):
-            tabs_backquotes_code_block_indices.append((code.start(), code.end() - 1))
-
-        tabs_tilde_code_block_indices = []
-        for code in re.finditer(MarkdownPatterns.TABS_TILDE_CODE_BLOCK.regex, markdown):
-            tabs_tilde_code_block_indices.append((code.start(), code.end() - 1))
-
-        do_not_exclude_indices = admonition_backquotes_code_block_indices + tabs_backquotes_code_block_indices + tabs_tilde_code_block_indices
-        return list(filter(lambda indices: indices not in do_not_exclude_indices, exclude_indices))
-
-    @staticmethod
-    def _is_overlapped(start: int, end: int, exclude_indices_pairs: List[tuple]) -> bool:
-        for exclude_indices_pair in exclude_indices_pairs:
-            if exclude_indices_pair[0] <= start and end <= exclude_indices_pair[1]:
-                return True
-        return False
diff --git a/obsidian_support/conversion/util.py b/obsidian_support/conversion/util.py
@@ -0,0 +1,79 @@
+import dataclasses
+import re
+from typing import Tuple, List
+
+CODE_BLOCK_PATTERN = re.compile(r'(?P<code_block>^[`~]{3,})(?P<language>[a-zA-Z\-]*)?', re.MULTILINE)
+BACKQUOTES_CODE_PATTERN = re.compile(r"`[^\n`]+`")
+
+
+@dataclasses.dataclass
+class CodeBlockSyntax:
+    start: int
+    end: int
+    code_block_type: str
+    language: str
+
+
+def get_exclude_indices(markdown: str) -> List[Tuple[int, int]]:
+    # setup
+    exclude_indices = []
+    code_block_matches = {}
+
+    # step 1
+    # get all lines stars with more than three of ` or ~
+    # and group it by its size
+    for code_block_match in CODE_BLOCK_PATTERN.finditer(markdown):
+        code_block_syntax = code_block_match.group("code_block")
+        size = len(code_block_syntax)
+        code_block_type = code_block_syntax[0]
+        start = code_block_match.start()
+        end = code_block_match.end()
+        language = code_block_match.group("language")
+
+        if size not in code_block_matches:
+            code_block_matches[size] = []
+        code_block_matches[size].append(CodeBlockSyntax(start, end, code_block_type, language))
+
+    # step 2
+    # loop the code_block_matches in desc sorted by its size
+    for code_block_size in sorted(code_block_matches.keys(), reverse=True):
+        code_block_matches_with_same_size = code_block_matches[code_block_size]
+        current_syntax = None
+        nested_code_block_syntax = None
+
+        # filter already excluded code_block_matches
+        code_block_matches_with_same_size = [it for it in code_block_matches_with_same_size if
+                                             not is_overlapped(it.start, it.end, exclude_indices)]
+        for code_block_syntax in code_block_matches_with_same_size:
+            if current_syntax is None:
+                current_syntax = code_block_syntax
+            elif current_syntax.code_block_type == code_block_syntax.code_block_type and \
+                    not is_overlapped(current_syntax.start, code_block_syntax.end, exclude_indices):
+                # do not exclude if its ad or tabs block
+                if not current_syntax.language.startswith("ad-") and not current_syntax.language == "tabs":
+                    exclude_indices.append((current_syntax.start, code_block_syntax.end))
+                current_syntax = None
+            elif (current_syntax.language.startswith("ad-") or current_syntax.language == "tabs") and \
+                    current_syntax.code_block_type != code_block_syntax.code_block_type and \
+                    nested_code_block_syntax is None:
+                nested_code_block_syntax = code_block_syntax
+            elif nested_code_block_syntax is not None and \
+                    code_block_syntax.code_block_type == nested_code_block_syntax.code_block_type:
+                if not nested_code_block_syntax.language.startswith("ad-") and not nested_code_block_syntax.language == "tabs":
+                    exclude_indices.append((nested_code_block_syntax.start, code_block_syntax.end))
+                nested_code_block_syntax = None
+
+    # step 3
+    # exclude backquotes_codes (``)
+    for code_match in re.finditer(BACKQUOTES_CODE_PATTERN, markdown):
+        if not is_overlapped(code_match.start(), code_match.end(), exclude_indices):
+            exclude_indices.append((code_match.start(), code_match.end()))
+
+    return exclude_indices
+
+
+def is_overlapped(start: int, end: int, exclude_indices_pairs: List[tuple]) -> bool:
+    for exclude_indices_pair in exclude_indices_pairs:
+        if exclude_indices_pair[0] <= start and end <= exclude_indices_pair[1]:
+            return True
+    return False
diff --git a/obsidian_support/plugin.py b/obsidian_support/plugin.py
@@ -32,13 +32,13 @@ def __init__(self):
 
     def on_page_markdown(self, markdown, page, config, files):
         # apply conversions
-        markdown = self.admonition_callout_conversions.markdown_convert(markdown, page)
-        markdown = self.tabs_backquotes_conversion.markdown_convert(markdown, page)
-        markdown = self.tabs_tilde_block_conversion.markdown_convert(markdown, page)
         markdown = self.admonition_backquotes_conversion.markdown_convert(markdown, page)
         markdown = self.comment_conversion.markdown_convert(markdown, page)
         markdown = self.pdf_conversion.markdown_convert(markdown, page)
         markdown = self.image_web_link_conversions.markdown_convert(markdown, page)
         markdown = self.image_internal_link_conversion.markdown_convert(markdown, page)
         markdown = self.tags_conversion.markdown_convert(markdown, page)
+        markdown = self.admonition_callout_conversions.markdown_convert(markdown, page)
+        markdown = self.tabs_tilde_block_conversion.markdown_convert(markdown, page)
+        markdown = self.tabs_backquotes_conversion.markdown_convert(markdown, page)
         return markdown
diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 from setuptools import setup, find_packages
 
-VERSION_NUMBER = '1.3.0'
+VERSION_NUMBER = '1.3.1'
 
 
 def read_file(fname):