From 606f70a0955150d8a5217c2558e30963653fd2ac Mon Sep 17 00:00:00 2001 From: Aster Date: Fri, 21 Jul 2017 15:16:04 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=A7=AA=20Try=20the=20org=20parser?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- projects/panduck-org-mode/src/from_org/mod.rs | 155 +++++++++--------- projects/panduck-org-mode/src/lib.rs | 5 +- projects/panduck-org-mode/src/utils/mod.rs | 34 ++-- projects/panduck-org-mode/tests/links/mod.rs | 2 +- projects/panduck-org-mode/tests/lists/mod.rs | 2 +- projects/panduck-org-mode/tests/main.rs | 2 +- projects/panduck-org-mode/tests/tables/mod.rs | 95 ++++++++++- 7 files changed, 177 insertions(+), 118 deletions(-) diff --git a/projects/panduck-org-mode/src/from_org/mod.rs b/projects/panduck-org-mode/src/from_org/mod.rs index 4d24c95..952085d 100644 --- a/projects/panduck-org-mode/src/from_org/mod.rs +++ b/projects/panduck-org-mode/src/from_org/mod.rs @@ -1,23 +1,17 @@ -use crate::utils::{ root_items, GetTextRange, NoteBlock, NoteInline, NoteInlineList, NoteRoot, ReadState}; -use markdown::{ - mdast::{ - BlockQuote, Code, Delete, Emphasis, InlineCode, InlineMath, List, Math, Node, Paragraph, Root, Strong, Table, Text, - }, - to_mdast, Constructs, ParseOptions, -}; +use crate::utils::{NoteBlock, NoteBlockList, NoteInline, NoteInlineList, NoteRoot, ReadState}; +use orgize::{rowan::ast::AstNode, ParseConfig, SyntaxKind, SyntaxNode, SyntaxNodeChildren}; +use orgize::ast::HeadlineTitle; use wasi_notedown::exports::notedown::core::{ - syntax_tree::{ - CodeEnvironment, CodeHighlight, ListEnvironment, ListItem, MathContent, MathDisplay, MathEnvironment, NormalText, - NotedownRoot, ParagraphBlock, ParagraphItem, RootItem, StyleType, StyledText, TableCell, TableEnvironment, TableRow, - }, - types::{NotedownError, Object, TextRange}, + syntax_tree::{NotedownRoot, RootItem}, + types::{NotedownError, Object}, }; +use wasi_notedown::exports::notedown::core::syntax_tree::ParagraphItem; -mod blocks; -mod html; -mod inline; -mod list; -mod table; +// mod blocks; +// mod html; +// mod inline; +// mod list; +// mod table; pub struct MarkdownParser {} @@ -29,57 +23,13 @@ impl Default for MarkdownParser { impl MarkdownParser { pub fn load_str(&self, input: &str) -> Result> { - let config = ParseOptions { - constructs: Constructs { - attention: true, - autolink: true, - block_quote: true, - character_escape: true, - character_reference: true, - code_indented: true, - code_fenced: true, - code_text: true, - definition: true, - frontmatter: true, - gfm_autolink_literal: true, - gfm_footnote_definition: true, - gfm_label_start_footnote: true, - gfm_strikethrough: true, - gfm_table: true, - gfm_task_list_item: true, - hard_break_escape: true, - hard_break_trailing: true, - heading_atx: true, - heading_setext: true, - html_flow: true, - html_text: true, - label_start_image: true, - label_start_link: true, - label_end: true, - list_item: true, - math_flow: true, - math_text: true, - mdx_esm: true, - mdx_expression_flow: true, - mdx_expression_text: true, - mdx_jsx_flow: true, - mdx_jsx_text: true, - thematic_break: true, - }, - gfm_strikethrough_single_tilde: false, - math_text_single_dollar: false, - mdx_expression_parse: None, - mdx_esm_parse: None, - }; + let config = ParseConfig { ..Default::default() }; let mut state = ReadState::default(); - let root = match to_mdast(input, &config) { - Ok(to_mdast) => match to_mdast.note_down_root(&mut state) { - Ok(o) => o, - Err(e) => { - todo!() - } - }, - Err(e) => { + let org = config.parse(input); + let doc = org.document(); + let root = match doc.syntax().note_down_root(&mut state) { + Ok(o) => o, + Err(_) => { todo!() } }; @@ -87,35 +37,78 @@ impl MarkdownParser { } } -impl NoteRoot for Node { +impl<'i> NoteRoot for &'i SyntaxNode { fn note_down_root(self, state: &mut ReadState) -> Result { - match self { - Self::Root(node) => node.note_down_root(state), - _ => unreachable!(), + match self.kind() { + SyntaxKind::DOCUMENT => { + let blocks = self.children().note_down_block(state); + Ok(NotedownRoot { blocks, config: Object { map: vec![] }, path: None }) + } + _ => unreachable!("SyntaxKind::{:?} => {{}}", self.kind()), } } } -impl NoteRoot for Root { - fn note_down_root(self, state: &mut ReadState) -> Result { - let blocks = root_items(self.children, state)?; - Ok(NotedownRoot { blocks, config: Object { map: vec![] }, path: None }) +impl NoteBlockList for SyntaxNodeChildren { + fn note_down_block(self, state: &mut ReadState) -> Vec { + let mut out = Vec::with_capacity(self.size_hint().0); + for node in self { + match node.note_down_block(state) { + Ok(o) => out.push(o), + Err(e) => state.note_error(e), + } + } + out } } -impl NoteInlineList for Vec { +impl NoteInlineList for SyntaxNodeChildren { fn note_down_inline(self, state: &mut ReadState) -> Vec { - let mut items = Vec::with_capacity(self.len()); - for x in self { - match x.note_down_inline(state) { - Ok(o) => items.push(o), + let mut out = Vec::with_capacity(self.size_hint().0); + for node in self { + match node.note_down_inline(state) { + Ok(o) => out.push(o), Err(e) => state.note_error(e), } } - items + out } } +impl NoteBlock for SyntaxNode { + fn note_down_block(self, state: &mut ReadState) -> Result { + match self.kind() { + SyntaxKind::SECTION => { + Ok(RootItem::Placeholder) + } + SyntaxKind::HEADLINE => { + let inline = self.children().note_down_inline(state); + todo!() + } + _ => unreachable!("SyntaxKind::{:?} => {{}}", self.kind()), + } + } +} + + +impl NoteInline for SyntaxNode { + fn note_down_inline(self, state: &mut ReadState) -> Result { + match self.kind() { + SyntaxKind::HEADLINE => { + Ok(ParagraphItem::Placeholder) + } + SyntaxKind::HEADLINE_TITLE => { + Ok(ParagraphItem::Placeholder) + } + _ => unreachable!("SyntaxKind::{:?} => {{}}", self.kind()), + } + } +} + + + + + #[test] fn ready() { println!("it works!") diff --git a/projects/panduck-org-mode/src/lib.rs b/projects/panduck-org-mode/src/lib.rs index a78b002..d98c924 100644 --- a/projects/panduck-org-mode/src/lib.rs +++ b/projects/panduck-org-mode/src/lib.rs @@ -1,5 +1,4 @@ -mod from_cmd; -mod from_pandoc; +mod from_org; mod utils; -pub use from_cmd::MarkdownParser; \ No newline at end of file +pub use from_org::MarkdownParser; \ No newline at end of file diff --git a/projects/panduck-org-mode/src/utils/mod.rs b/projects/panduck-org-mode/src/utils/mod.rs index 49038cb..6aae3b6 100644 --- a/projects/panduck-org-mode/src/utils/mod.rs +++ b/projects/panduck-org-mode/src/utils/mod.rs @@ -1,4 +1,3 @@ -use markdown::{mdast::Node, unist::Position}; use wasi_notedown::exports::notedown::core::{ syntax_tree::{NotedownRoot, ParagraphItem, RootItem}, types::{NotedownError, TextRange}, @@ -31,6 +30,9 @@ pub trait NoteRoot { pub trait NoteBlock { fn note_down_block(self, state: &mut ReadState) -> Result; } +pub trait NoteBlockList { + fn note_down_block(self, state: &mut ReadState) -> Vec; +} pub trait NoteInline { fn note_down_inline(self, state: &mut ReadState) -> Result; @@ -43,25 +45,11 @@ pub trait GetTextRange { fn as_range(&self) -> TextRange; } -impl GetTextRange for Option { - fn as_range(&self) -> TextRange { - match self { - Some(s) => TextRange { head_offset: s.start.offset as u32, tail_offset: s.end.offset as u32 }, - None => TextRange { head_offset: 0, tail_offset: 0 }, - } - } -} - -pub fn root_items(children: Vec, state: &mut ReadState) -> Result, NotedownError> { - let mut blocks = Vec::with_capacity(children.len()); - for x in children { - match x.note_down_block(state) { - Ok(o) => blocks.push(o), - Err(e) => { - state.errors.push(e); - } - } - } - Ok(blocks) -} - +// impl GetTextRange for Option { +// fn as_range(&self) -> TextRange { +// match self { +// Some(s) => TextRange { head_offset: s.start.offset as u32, tail_offset: s.end.offset as u32 }, +// None => TextRange { head_offset: 0, tail_offset: 0 }, +// } +// } +// } diff --git a/projects/panduck-org-mode/tests/links/mod.rs b/projects/panduck-org-mode/tests/links/mod.rs index 9e67270..658111a 100644 --- a/projects/panduck-org-mode/tests/links/mod.rs +++ b/projects/panduck-org-mode/tests/links/mod.rs @@ -1,4 +1,4 @@ -use panduck_markdown::MarkdownParser; +use panduck_org_mode::MarkdownParser; const TEST_IMAGE: &'static str = r#" ![image-reference] diff --git a/projects/panduck-org-mode/tests/lists/mod.rs b/projects/panduck-org-mode/tests/lists/mod.rs index 494bc2e..f998cbb 100644 --- a/projects/panduck-org-mode/tests/lists/mod.rs +++ b/projects/panduck-org-mode/tests/lists/mod.rs @@ -1,4 +1,4 @@ -use panduck_markdown::MarkdownParser; +use panduck_org_mode::MarkdownParser; const TEST_LIST1: &'static str = r#" - a diff --git a/projects/panduck-org-mode/tests/main.rs b/projects/panduck-org-mode/tests/main.rs index 9ce461e..0255730 100644 --- a/projects/panduck-org-mode/tests/main.rs +++ b/projects/panduck-org-mode/tests/main.rs @@ -1,4 +1,4 @@ -use panduck_markdown::MarkdownParser; +use panduck_org_mode::MarkdownParser; mod links; mod lists; diff --git a/projects/panduck-org-mode/tests/tables/mod.rs b/projects/panduck-org-mode/tests/tables/mod.rs index 8e47c6b..2140f14 100644 --- a/projects/panduck-org-mode/tests/tables/mod.rs +++ b/projects/panduck-org-mode/tests/tables/mod.rs @@ -1,10 +1,92 @@ -use panduck_markdown::MarkdownParser; +use panduck_org_mode::MarkdownParser; const TEST_TABLE1: &'static str = r#" -| Name | Age | -| ---- | --- | -| John | 20 | -| Jane | 30 | +# git hash: f918bf4 +# build time: Mon, 01 Apr 2024 03:44:43 +0000 + +*Orgize*, a /pure/ =Rust= library for +parsing +Emacs+ _org-mode_ files. + +See also: +[[https://github.com/PoiScript/orgize][GitHub]] | +[[https://crates.io/crates/orgize][crates.io]] | +[[https://www.npmjs.com/package/orgize][NPM]] + +----- + +* Heading 1 *bold* +** Heading 2 =verbatim= +*** Heading 3 ~code~ +**** Heading 4 /italic/ +***** Heading 5 +strike+ +****** Heading 6 _underline_ + +This's section + +#+begin_quote +This is a quote +#+end_quote + +#+begin_example +This is an example block +#+end_example + +----- +List + +1. First item +2. Second item +3. Third item + * Indented item + * Indented item +4. Fourth item + +----- +Description list + +- Rust _programming_ language:: A language empowering everyone + to build reliable and efficient software. + +----- +Table + +|Syntax |Description| +|-----------|-----------| +|Header |Title | +|Paragraph |Text | + +----- +Image + +[[https://www.rust-lang.org/static/images/rust-logo-blk.svg]] + +----- +LaTeX + +Render with \(\KaTeX\): $x+y$ + +$$ + f(\relax{x}) = \int_{-\infty}^\infty + \hat{f}(\xi)\,e^{2 \pi i \xi x} + \,d\xi +$$ + +\begin{align} + a&=b+c \\ + d+e&=f +\end{align} + +----- +Entity + +\alpha\_ \rightarrow{}\_ \beta + +----- +Subscript & superscript & line break + +E= mc^2 \\ +Fe_{_3_}O_4 + "#; const TEST_TABLE2: &'static str = r#"+++ @@ -22,7 +104,4 @@ yaml: 1 pub fn test_table() { let reader = MarkdownParser::default(); let _ = reader.load_str(TEST_TABLE1).unwrap(); - let _ = reader.load_str(TEST_TABLE2).unwrap(); - let _ = reader.load_str(TEST_TABLE3).unwrap(); - }