Skip to content

Commit

Permalink
Merge pull request #38 from ndy2/refactor-get-exclude-indinces
Browse files Browse the repository at this point in the history
support more precise escape for code blocks
  • Loading branch information
ndy2 authored Mar 8, 2024
2 parents 695afda + f4dc9b3 commit daa5189
Show file tree
Hide file tree
Showing 9 changed files with 328 additions and 156 deletions.
24 changes: 15 additions & 9 deletions docs/features/admonition/backquotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,32 +13,38 @@ comments: true
### Admonition Backquotes

=== "obsidian markdown"
~~~

~~~tabs
---tab obsidian markdown
````
```ad-tip
title: This is a tip
This is the content of the admonition tip.
```
````
---tab obsidian rendered
![[images/backquotes_1.png]]
~~~

=== "obsidian rendered"
![[images/backquotes_1.png]]

### mkdocs-material admonition

=== "mkdocs-material markdown"


### mkdocs-material admonition

~~~tabs
---tab mkdocs-material markdown
```
!!!tip "This is a tip"
This is the content of the admonition tip.
```
=== "mkdocs-material rendered"

---tab mkdocs-material rendered
```ad-tip
title: This is a tip
This is the content of the admonition tip.
```
```
~~~
28 changes: 13 additions & 15 deletions docs/features/comment/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,41 +12,39 @@ comments: true
## Usage

### obsidian comment

=== "obsidian markdown"

```
```tabs
---tab obsidian markdown
~~~
This is an %%inline%% comment.
%%
This is a block comment.
Block comments can span multiple lines.
%%
```

=== "obsidian reading view"
~~~
---tab obsidian reading view
![[images/comment_1.png]]
```

### mkdocs-material comment

=== "mkdocs-material markdown"
### mkdocs-material comment

```
```tabs
---tab mkdocs-material markdown
~~~
This is an <!--inline--> comment.
<!--
This is a block comment.
Block comments can span multiple lines.
-->
```

=== "mkdocs-material rendered"
~~~
---tab mkdocs-material rendered
This is an %%inline%% comment.

%%
This is a block comment.
Block comments can span multiple lines.
%%

```
4 changes: 4 additions & 0 deletions docs/features/tabs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ This tab contains callout

### mkdocs-material content tabs

**mkdocs markdown**

~~~~
=== "First *tab*"
Expand Down Expand Up @@ -92,6 +94,8 @@ This tab contains callout
~~~~


**rendered**

```tabs
---tab First *tab*
This is a *sample* **tab** with some markdown :
Expand Down
84 changes: 7 additions & 77 deletions obsidian_support/conversion/abstract_conversion.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import re
from abc import *
from enum import Enum
from typing import List, Tuple
from typing import List

from mkdocs.structure.pages import Page
from overrides import final

from obsidian_support.conversion.util import is_overlapped, get_exclude_indices

"""
An abstract class that implies a conversion from `obsidian syntax` to `mkdocs-material syntax`
Expand All @@ -20,32 +20,6 @@
SyntaxGroup = List[str]


class MarkdownPatterns(Enum):
# Exclude Patterns
""" regex that matches markdown `tilde code block` (triple tilde syntax)"""
TILDE_CODE_BLOCK = r"([A-Za-z \t]*)~~~([-A-Za-z]*)?\n([\s\S]*?)~~~([A-Za-z \t]*)*"

"""regex that matches markdown `backtick code block` (triple backtick syntax)"""
BACKTICK_CODE_BLOCK = r"([A-Za-z \t]*)```([-A-Za-z]*)?\n([\s\S]*?)```([A-Za-z \t]*)*"

"""regex that matches markdown code (single backtick syntax)"""
BACKTICK_CODE = r"`[\S\s]*?`"

# Do Not Exclude Patterns
"""regex that matches markdown `admonition backquotes` (triple tilde syntax)"""
ADMONITION_BACKQUOTES_CODE_BLOCK = r"([A-Za-z \t]*)```ad-([-A-Za-z]*)?\n([\s\S]*?)```([A-Za-z \t]*)*"

"""regex that matches markdown `tabs backquotes code block` (triple tilde syntax)"""
TABS_BACKQUOTES_CODE_BLOCK = r"([A-Za-z \t]*)```tabs([-A-Za-z]*)?\n([\s\S]*?)```([A-Za-z \t]*)*"

"""regex that matches markdown `tabs tilde code block` (triple tilde syntax)"""
TABS_TILDE_CODE_BLOCK = r"([A-Za-z \t]*)~~~tabs([-A-Za-z]*)?\n([\s\S]*?)~~~([A-Za-z \t]*)*"

@property
def regex(self):
return self.value


class AbstractConversion(metaclass=ABCMeta):

@property
Expand All @@ -69,16 +43,15 @@ def convert(self, syntax_groups: SyntaxGroup, page: Page) -> str:
def markdown_convert(self, markdown: str, page: Page) -> str:
converted_markdown = ""
index = 0
excluded_indices = self._get_excluded_indices(markdown)
excluded_indices = self._filter_do_not_exclude_indices(excluded_indices, markdown)
excluded_indices = get_exclude_indices(markdown)

for obsidian_syntax in self.obsidian_regex_pattern.finditer(markdown):
## found range of markdown where the obsidian_regex matches
# find range of markdown where the obsidian_regex matches
start = obsidian_syntax.start()
end = obsidian_syntax.end() - 1

## continue if match is in excluded range
if self._is_overlapped(start, end, excluded_indices):
# continue if match is in excluded range
if is_overlapped(start, end, excluded_indices):
continue

syntax_groups = list(map(lambda group: obsidian_syntax.group(group), self.obsidian_regex_groups))
Expand All @@ -90,46 +63,3 @@ def markdown_convert(self, markdown: str, page: Page) -> str:

converted_markdown += markdown[index:len(markdown)]
return converted_markdown

@staticmethod
def _get_excluded_indices(markdown: str) -> List[tuple]:
tilde_code_block_indices = []
for code in re.finditer(MarkdownPatterns.TILDE_CODE_BLOCK.regex, markdown):
tilde_code_block_indices.append((code.start(), code.end() - 1))

backtick_code_block_indices = []
for code in re.finditer(MarkdownPatterns.BACKTICK_CODE_BLOCK.regex, markdown):
if not AbstractConversion._is_overlapped(code.start(), code.end() - 1, tilde_code_block_indices):
backtick_code_block_indices.append((code.start(), code.end() - 1))

backtick_code_indices = []
for code in re.finditer(MarkdownPatterns.BACKTICK_CODE.regex, markdown):
if not AbstractConversion._is_overlapped(code.start(), code.end() - 1, tilde_code_block_indices) and \
not AbstractConversion._is_overlapped(code.start(), code.end() - 1, backtick_code_block_indices):
backtick_code_indices.append((code.start(), code.end() - 1))

return tilde_code_block_indices + backtick_code_block_indices + backtick_code_indices

@staticmethod
def _filter_do_not_exclude_indices(exclude_indices: List[Tuple], markdown) -> List[Tuple]:
admonition_backquotes_code_block_indices = []
for code in re.finditer(MarkdownPatterns.ADMONITION_BACKQUOTES_CODE_BLOCK.regex, markdown):
admonition_backquotes_code_block_indices.append((code.start(), code.end() - 1))

tabs_backquotes_code_block_indices = []
for code in re.finditer(MarkdownPatterns.TABS_BACKQUOTES_CODE_BLOCK.regex, markdown):
tabs_backquotes_code_block_indices.append((code.start(), code.end() - 1))

tabs_tilde_code_block_indices = []
for code in re.finditer(MarkdownPatterns.TABS_TILDE_CODE_BLOCK.regex, markdown):
tabs_tilde_code_block_indices.append((code.start(), code.end() - 1))

do_not_exclude_indices = admonition_backquotes_code_block_indices + tabs_backquotes_code_block_indices + tabs_tilde_code_block_indices
return list(filter(lambda indices: indices not in do_not_exclude_indices, exclude_indices))

@staticmethod
def _is_overlapped(start: int, end: int, exclude_indices_pairs: List[tuple]) -> bool:
for exclude_indices_pair in exclude_indices_pairs:
if exclude_indices_pair[0] <= start and end <= exclude_indices_pair[1]:
return True
return False
79 changes: 79 additions & 0 deletions obsidian_support/conversion/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import dataclasses
import re
from typing import Tuple, List

CODE_BLOCK_PATTERN = re.compile(r'(?P<code_block>^[`~]{3,})(?P<language>[a-zA-Z\-]*)?', re.MULTILINE)
BACKQUOTES_CODE_PATTERN = re.compile(r"`[^\n`]+`")


@dataclasses.dataclass
class CodeBlockSyntax:
start: int
end: int
code_block_type: str
language: str


def get_exclude_indices(markdown: str) -> List[Tuple[int, int]]:
# setup
exclude_indices = []
code_block_matches = {}

# step 1
# get all lines stars with more than three of ` or ~
# and group it by its size
for code_block_match in CODE_BLOCK_PATTERN.finditer(markdown):
code_block_syntax = code_block_match.group("code_block")
size = len(code_block_syntax)
code_block_type = code_block_syntax[0]
start = code_block_match.start()
end = code_block_match.end()
language = code_block_match.group("language")

if size not in code_block_matches:
code_block_matches[size] = []
code_block_matches[size].append(CodeBlockSyntax(start, end, code_block_type, language))

# step 2
# loop the code_block_matches in desc sorted by its size
for code_block_size in sorted(code_block_matches.keys(), reverse=True):
code_block_matches_with_same_size = code_block_matches[code_block_size]
current_syntax = None
nested_code_block_syntax = None

# filter already excluded code_block_matches
code_block_matches_with_same_size = [it for it in code_block_matches_with_same_size if
not is_overlapped(it.start, it.end, exclude_indices)]
for code_block_syntax in code_block_matches_with_same_size:
if current_syntax is None:
current_syntax = code_block_syntax
elif current_syntax.code_block_type == code_block_syntax.code_block_type and \
not is_overlapped(current_syntax.start, code_block_syntax.end, exclude_indices):
# do not exclude if its ad or tabs block
if not current_syntax.language.startswith("ad-") and not current_syntax.language == "tabs":
exclude_indices.append((current_syntax.start, code_block_syntax.end))
current_syntax = None
elif (current_syntax.language.startswith("ad-") or current_syntax.language == "tabs") and \
current_syntax.code_block_type != code_block_syntax.code_block_type and \
nested_code_block_syntax is None:
nested_code_block_syntax = code_block_syntax
elif nested_code_block_syntax is not None and \
code_block_syntax.code_block_type == nested_code_block_syntax.code_block_type:
if not nested_code_block_syntax.language.startswith("ad-") and not nested_code_block_syntax.language == "tabs":
exclude_indices.append((nested_code_block_syntax.start, code_block_syntax.end))
nested_code_block_syntax = None

# step 3
# exclude backquotes_codes (``)
for code_match in re.finditer(BACKQUOTES_CODE_PATTERN, markdown):
if not is_overlapped(code_match.start(), code_match.end(), exclude_indices):
exclude_indices.append((code_match.start(), code_match.end()))

return exclude_indices


def is_overlapped(start: int, end: int, exclude_indices_pairs: List[tuple]) -> bool:
for exclude_indices_pair in exclude_indices_pairs:
if exclude_indices_pair[0] <= start and end <= exclude_indices_pair[1]:
return True
return False
6 changes: 3 additions & 3 deletions obsidian_support/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@ def __init__(self):

def on_page_markdown(self, markdown, page, config, files):
# apply conversions
markdown = self.admonition_callout_conversions.markdown_convert(markdown, page)
markdown = self.tabs_backquotes_conversion.markdown_convert(markdown, page)
markdown = self.tabs_tilde_block_conversion.markdown_convert(markdown, page)
markdown = self.admonition_backquotes_conversion.markdown_convert(markdown, page)
markdown = self.comment_conversion.markdown_convert(markdown, page)
markdown = self.pdf_conversion.markdown_convert(markdown, page)
markdown = self.image_web_link_conversions.markdown_convert(markdown, page)
markdown = self.image_internal_link_conversion.markdown_convert(markdown, page)
markdown = self.tags_conversion.markdown_convert(markdown, page)
markdown = self.admonition_callout_conversions.markdown_convert(markdown, page)
markdown = self.tabs_tilde_block_conversion.markdown_convert(markdown, page)
markdown = self.tabs_backquotes_conversion.markdown_convert(markdown, page)
return markdown
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from setuptools import setup, find_packages

VERSION_NUMBER = '1.3.0'
VERSION_NUMBER = '1.3.1'


def read_file(fname):
Expand Down
Loading

0 comments on commit daa5189

Please sign in to comment.