From eb3367c7a55dd64d1ec228b062792160030dd198 Mon Sep 17 00:00:00 2001 From: MrShwhale Date: Wed, 20 Mar 2024 00:41:40 -0700 Subject: [PATCH 1/9] Add documentation to the info and includes modules. --- src/includes/include_ref.rs | 6 ++--- src/includes/includer/debug.rs | 7 ++++++ src/includes/includer/mod.rs | 8 +++++- src/includes/includer/null.rs | 1 + src/includes/mod.rs | 8 +++++- src/includes/parse.rs | 16 ++++++++++++ src/info.rs | 8 ++++-- src/lib.rs | 45 ++++++++++++++++++++++++++++++++++ 8 files changed, 92 insertions(+), 7 deletions(-) diff --git a/src/includes/include_ref.rs b/src/includes/include_ref.rs index 110794b8a..ca4499a72 100644 --- a/src/includes/include_ref.rs +++ b/src/includes/include_ref.rs @@ -21,10 +21,10 @@ use crate::data::PageRef; use crate::tree::VariableMap; -/// Represents an include block. +/// Represents an include block before it has been replaced with the fetched page. /// -/// It contains the page being included, as well as the arguments -/// to be passed to it when doing the substitution. +/// It contains the page being included, as well as the variables passed to it in the include +/// block. #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] #[serde(rename_all = "kebab-case")] pub struct IncludeRef<'t> { diff --git a/src/includes/includer/debug.rs b/src/includes/includer/debug.rs index c51987327..bfe5e7198 100644 --- a/src/includes/includer/debug.rs +++ b/src/includes/includer/debug.rs @@ -20,9 +20,16 @@ use super::prelude::*; use crate::tree::VariableMap; +// Imported so that tests work, otherwise not used +#[allow(unused_imports)] +use std::collections::HashMap; use std::convert::Infallible; use std::fmt::{self, Display}; +/// An [`Includer`] that replaces included references with the page content followed by the +/// include variables and their values. +/// +/// Useful for testing includes. #[derive(Debug)] pub struct DebugIncluder; diff --git a/src/includes/includer/mod.rs b/src/includes/includer/mod.rs index 20e4117e2..03355415a 100644 --- a/src/includes/includer/mod.rs +++ b/src/includes/includer/mod.rs @@ -18,6 +18,9 @@ * along with this program. If not, see . */ +//! This module contains tools which format pages after they have been referenced in an include +//! block. + mod debug; mod null; @@ -25,7 +28,6 @@ mod prelude { pub use crate::data::PageRef; pub use crate::includes::{FetchedPage, IncludeRef, Includer}; pub use std::borrow::Cow; - pub use std::collections::HashMap; } use crate::includes::{IncludeRef, PageRef}; @@ -34,6 +36,7 @@ use std::borrow::Cow; pub use self::debug::DebugIncluder; pub use self::null::NullIncluder; +/// A type used by [`Includer`] which represents a page that is ready to be included. #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] #[serde(rename_all = "kebab-case")] pub struct FetchedPage<'t> { @@ -41,14 +44,17 @@ pub struct FetchedPage<'t> { pub content: Option>, } +/// A trait that handles the formatting of included pages. pub trait Includer<'t> { type Error; + /// Returns a list of the pages included. fn include_pages( &mut self, includes: &[IncludeRef<'t>], ) -> Result>, Self::Error>; + /// Handles the inclusion of a page not found. fn no_such_include( &mut self, page_ref: &PageRef<'t>, diff --git a/src/includes/includer/null.rs b/src/includes/includer/null.rs index 709ec7e73..eb08685a7 100644 --- a/src/includes/includer/null.rs +++ b/src/includes/includer/null.rs @@ -21,6 +21,7 @@ use super::prelude::*; use std::convert::Infallible; +/// An [`Includer`] that replaces include blocks with nothing. #[derive(Debug)] pub struct NullIncluder; diff --git a/src/includes/mod.rs b/src/includes/mod.rs index aeb13c0cf..d677106d6 100644 --- a/src/includes/mod.rs +++ b/src/includes/mod.rs @@ -20,10 +20,11 @@ //! This module implements "messy includes", or Wikidot native includes. //! -//! It is an annoying but necessary hack that parses the psueodblock +//! It is an annoying but necessary hack that parses the psuedoblock //! `[[include-messy]]` and directly replaces that part with the //! foreign page's wikitext. +#[warn(missing_docs)] #[cfg(test)] mod test; @@ -52,6 +53,8 @@ static INCLUDE_REGEX: Lazy = Lazy::new(|| { static VARIABLE_REGEX: Lazy = Lazy::new(|| Regex::new(r"\{\$(?P[a-zA-Z0-9_\-]+)\}").unwrap()); +/// Replaces the include blocks in a string with the content of the pages referenced by those +/// blocks. pub fn include<'t, I, E, F>( input: &'t str, settings: &WikitextSettings, @@ -157,6 +160,9 @@ where Ok((output, pages)) } +/// Replaces all specified variables in the content to be included. +/// +/// Read for more details. fn replace_variables(content: &mut String, variables: &VariableMap) { let mut matches = Vec::new(); diff --git a/src/includes/parse.rs b/src/includes/parse.rs index 7fbe50b90..f9f90fbc9 100644 --- a/src/includes/parse.rs +++ b/src/includes/parse.rs @@ -18,6 +18,8 @@ * along with this program. If not, see . */ +//! This module provides functions to parse strings into [`IncludeRef`]s + use super::IncludeRef; use crate::data::{PageRef, PageRefParseError}; use crate::settings::WikitextSettings; @@ -30,6 +32,19 @@ use std::collections::HashMap; #[grammar = "includes/grammar.pest"] struct IncludeParser; +/// Parses a single include block in the text +/// +/// # Arguments +/// The "start" argument is the index at which the include block starts. +/// +/// It does not necessarily relate to the index of the include within the text str. +/// +/// # Return values +/// Returns a tuple of an [`IncludeRef`] that represents the included text and a usize that +/// represents the end index of the include block. +/// +/// This usize is not necessarily the end index within the text string passed in, and is +/// solely based on the length of the include block and the start value. pub fn parse_include_block<'t>( text: &'t str, start: usize, @@ -63,6 +78,7 @@ pub fn parse_include_block<'t>( } } +/// Creates an [`IncludeRef`] out of pest [`Pairs`]. fn process_pairs(mut pairs: Pairs) -> Result { let page_raw = pairs.next().ok_or(IncludeParseError)?.as_str(); let page_ref = PageRef::parse(page_raw)?; diff --git a/src/info.rs b/src/info.rs index 37bf25d9e..bfbb17711 100644 --- a/src/info.rs +++ b/src/info.rs @@ -18,6 +18,8 @@ * along with this program. If not, see . */ +//! This module has build and meta information about the library. + #[allow(unused)] mod build { include!(concat!(env!("OUT_DIR"), "/built.rs")); @@ -40,7 +42,9 @@ static VERSION_INFO: Lazy = Lazy::new(|| { version }); +/// The package name and version info. pub static VERSION: Lazy = Lazy::new(|| format!("{PKG_NAME} {}", *VERSION_INFO)); +/// The full version info, including build information. pub static FULL_VERSION: Lazy = Lazy::new(|| { let mut version = format!("{}\n\nCompiled:\n", *VERSION_INFO); @@ -51,10 +55,10 @@ pub static FULL_VERSION: Lazy = Lazy::new(|| { version }); -pub static VERSION_WITH_NAME: Lazy = - Lazy::new(|| format!("{PKG_NAME} {}", *VERSION)); +/// The package name and full version info, including build information. pub static FULL_VERSION_WITH_NAME: Lazy = Lazy::new(|| format!("{PKG_NAME} {}", *FULL_VERSION)); +// The last 8 characters of the commit hash for this version. pub static GIT_COMMIT_HASH_SHORT: Lazy> = Lazy::new(|| GIT_COMMIT_HASH.map(|s| &s[..8])); diff --git a/src/lib.rs b/src/lib.rs index d6824061f..3b94619b9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -53,6 +53,50 @@ //! which renders LaTeX blocks using MathML. It is enabled //! by default. //! +//! # Examples +// TODO do something with the links in these comments +//! ``` +//!// Get an `Includer`. +//!// +//!// See trait documentation for what this requires, but +//!// essentially it is some abstract handle that gets the +//!// contents of a page to be included. +//!// +//!// Two sample includers you could try are `NullIncluder` +//!// and `DebugIncluder`. +//!let includer = MyIncluderImpl::new(); +//! +//!// Get our source text +//!let mut input = "**some** test <>"; +//! +//!// Substitute page inclusions +//!let (mut text, included_pages) = ftml::include(input, includer, &settings); +//! +//!// Perform preprocess substitutions +//!ftml::preprocess(&log, &mut text); +//! +//!// Generate token from input text +//!let tokens = ftml::tokenize(&text); +//! +//!// Parse the token list to produce an AST. +//!// +//!// Note that this produces a `ParseResult`, which records the +//!// parsing warnings in addition to the final result. +//!let result = ftml::parse(&tokens, &page_info, &settings); +//! +//!// Here we extract the tree separately from the warning list. +//!// +//!// Now we have the final AST, as well as all the issues that +//!// occurred during the parsing process. +//!let (tree, warnings) = result.into(); +//!// Finally, we render with our renderer. Generally this is `HtmlRender`, +//!// but you could have a custom implementation here too. +//!// +//!// You must provide a `PageInfo` struct, which describes the page being rendered. +//!// You must also provide a handle to provide various remote sources, such as +//!// module content, but this is not stabilized yet. +//!let html_output = HtmlRender.render(&tree, &page_info, &settings); +//! ```` //! # Targets //! The library supports being compiled into WebAssembly. //! (target `wasm32-unknown-unknown`, see [`wasm-pack`] for more information) @@ -133,6 +177,7 @@ pub use self::preproc::preprocess; pub use self::tokenizer::{tokenize, Tokenization}; pub use self::utf16::Utf16IndexMap; +/// This module collects commonly used traits from this crate. pub mod prelude { pub use super::data::{PageInfo, ScoreValue}; pub use super::includes::{include, Includer}; From 8be858ac0b1554bc19e4165488a7b78bfbdb167a Mon Sep 17 00:00:00 2001 From: MrShwhale Date: Wed, 20 Mar 2024 12:14:49 -0700 Subject: [PATCH 2/9] Remove example from library doc. --- src/lib.rs | 45 +-------------------------------------------- 1 file changed, 1 insertion(+), 44 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 3b94619b9..658569946 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,6 +18,7 @@ * along with this program. If not, see . */ +// TODO do something with the links in the example comments //! A library to parse Wikidot text and produce an abstract syntax tree (AST). //! //! This library aims to be a replacement of Wikidot's Text_Wiki @@ -53,50 +54,6 @@ //! which renders LaTeX blocks using MathML. It is enabled //! by default. //! -//! # Examples -// TODO do something with the links in these comments -//! ``` -//!// Get an `Includer`. -//!// -//!// See trait documentation for what this requires, but -//!// essentially it is some abstract handle that gets the -//!// contents of a page to be included. -//!// -//!// Two sample includers you could try are `NullIncluder` -//!// and `DebugIncluder`. -//!let includer = MyIncluderImpl::new(); -//! -//!// Get our source text -//!let mut input = "**some** test <>"; -//! -//!// Substitute page inclusions -//!let (mut text, included_pages) = ftml::include(input, includer, &settings); -//! -//!// Perform preprocess substitutions -//!ftml::preprocess(&log, &mut text); -//! -//!// Generate token from input text -//!let tokens = ftml::tokenize(&text); -//! -//!// Parse the token list to produce an AST. -//!// -//!// Note that this produces a `ParseResult`, which records the -//!// parsing warnings in addition to the final result. -//!let result = ftml::parse(&tokens, &page_info, &settings); -//! -//!// Here we extract the tree separately from the warning list. -//!// -//!// Now we have the final AST, as well as all the issues that -//!// occurred during the parsing process. -//!let (tree, warnings) = result.into(); -//!// Finally, we render with our renderer. Generally this is `HtmlRender`, -//!// but you could have a custom implementation here too. -//!// -//!// You must provide a `PageInfo` struct, which describes the page being rendered. -//!// You must also provide a handle to provide various remote sources, such as -//!// module content, but this is not stabilized yet. -//!let html_output = HtmlRender.render(&tree, &page_info, &settings); -//! ```` //! # Targets //! The library supports being compiled into WebAssembly. //! (target `wasm32-unknown-unknown`, see [`wasm-pack`] for more information) From 69da2a00f85b784981e54e13fb3e8d496659370f Mon Sep 17 00:00:00 2001 From: MrShwhale Date: Wed, 20 Mar 2024 12:27:59 -0700 Subject: [PATCH 3/9] Remove lib.rs example. --- src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 658569946..bf31fb757 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,7 +18,6 @@ * along with this program. If not, see . */ -// TODO do something with the links in the example comments //! A library to parse Wikidot text and produce an abstract syntax tree (AST). //! //! This library aims to be a replacement of Wikidot's Text_Wiki From f57309a7063a5ecceae8860503a2ea483c189bdf Mon Sep 17 00:00:00 2001 From: MrShwhale Date: Wed, 20 Mar 2024 12:14:49 -0700 Subject: [PATCH 4/9] Remove example from lib.rs doc. --- src/lib.rs | 44 -------------------------------------------- 1 file changed, 44 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 3b94619b9..bf31fb757 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -53,50 +53,6 @@ //! which renders LaTeX blocks using MathML. It is enabled //! by default. //! -//! # Examples -// TODO do something with the links in these comments -//! ``` -//!// Get an `Includer`. -//!// -//!// See trait documentation for what this requires, but -//!// essentially it is some abstract handle that gets the -//!// contents of a page to be included. -//!// -//!// Two sample includers you could try are `NullIncluder` -//!// and `DebugIncluder`. -//!let includer = MyIncluderImpl::new(); -//! -//!// Get our source text -//!let mut input = "**some** test <>"; -//! -//!// Substitute page inclusions -//!let (mut text, included_pages) = ftml::include(input, includer, &settings); -//! -//!// Perform preprocess substitutions -//!ftml::preprocess(&log, &mut text); -//! -//!// Generate token from input text -//!let tokens = ftml::tokenize(&text); -//! -//!// Parse the token list to produce an AST. -//!// -//!// Note that this produces a `ParseResult`, which records the -//!// parsing warnings in addition to the final result. -//!let result = ftml::parse(&tokens, &page_info, &settings); -//! -//!// Here we extract the tree separately from the warning list. -//!// -//!// Now we have the final AST, as well as all the issues that -//!// occurred during the parsing process. -//!let (tree, warnings) = result.into(); -//!// Finally, we render with our renderer. Generally this is `HtmlRender`, -//!// but you could have a custom implementation here too. -//!// -//!// You must provide a `PageInfo` struct, which describes the page being rendered. -//!// You must also provide a handle to provide various remote sources, such as -//!// module content, but this is not stabilized yet. -//!let html_output = HtmlRender.render(&tree, &page_info, &settings); -//! ```` //! # Targets //! The library supports being compiled into WebAssembly. //! (target `wasm32-unknown-unknown`, see [`wasm-pack`] for more information) From b11091853040bf9f815dd3694ed8f066872d1071 Mon Sep 17 00:00:00 2001 From: William Patmore <93882520+MrShwhale@users.noreply.github.com> Date: Wed, 20 Mar 2024 19:39:13 -0700 Subject: [PATCH 5/9] Update src/includes/includer/debug.rs Include hashmap only for tests Co-authored-by: emmiegit --- src/includes/includer/debug.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/includes/includer/debug.rs b/src/includes/includer/debug.rs index bfe5e7198..c5b21103d 100644 --- a/src/includes/includer/debug.rs +++ b/src/includes/includer/debug.rs @@ -20,8 +20,7 @@ use super::prelude::*; use crate::tree::VariableMap; -// Imported so that tests work, otherwise not used -#[allow(unused_imports)] +#[cfg(test)] use std::collections::HashMap; use std::convert::Infallible; use std::fmt::{self, Display}; From a0f91c925c36b818c05dcad1f11fd37f6f9a0b81 Mon Sep 17 00:00:00 2001 From: William Patmore <93882520+MrShwhale@users.noreply.github.com> Date: Wed, 20 Mar 2024 19:39:24 -0700 Subject: [PATCH 6/9] Update src/includes/parse.rs Co-authored-by: emmiegit --- src/includes/parse.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/includes/parse.rs b/src/includes/parse.rs index f9f90fbc9..d37257cf4 100644 --- a/src/includes/parse.rs +++ b/src/includes/parse.rs @@ -32,7 +32,7 @@ use std::collections::HashMap; #[grammar = "includes/grammar.pest"] struct IncludeParser; -/// Parses a single include block in the text +/// Parses a single include block in the text. /// /// # Arguments /// The "start" argument is the index at which the include block starts. From 453d648f24f2c065a04c5927cbe178103e376f14 Mon Sep 17 00:00:00 2001 From: William Patmore <93882520+MrShwhale@users.noreply.github.com> Date: Wed, 20 Mar 2024 19:47:40 -0700 Subject: [PATCH 7/9] Update src/includes/parse.rs Co-authored-by: emmiegit --- src/includes/parse.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/includes/parse.rs b/src/includes/parse.rs index d37257cf4..c8806bb37 100644 --- a/src/includes/parse.rs +++ b/src/includes/parse.rs @@ -36,7 +36,6 @@ struct IncludeParser; /// /// # Arguments /// The "start" argument is the index at which the include block starts. -/// /// It does not necessarily relate to the index of the include within the text str. /// /// # Return values From d9f082bf87f10adc692b55ebb5ec70e3d848aa44 Mon Sep 17 00:00:00 2001 From: MrShwhale Date: Thu, 21 Mar 2024 18:30:09 -0700 Subject: [PATCH 8/9] Make start value explicitly used in parse_include_block. --- src/includes/mod.rs | 2 +- src/includes/parse.rs | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/includes/mod.rs b/src/includes/mod.rs index d677106d6..fb49359cb 100644 --- a/src/includes/mod.rs +++ b/src/includes/mod.rs @@ -88,7 +88,7 @@ where mtch.as_str(), ); - match parse_include_block(&input[start..], start, settings) { + match parse_include_block(&input, start, settings) { Ok((include, end)) => { ranges.push(start..end); includes.push(include); diff --git a/src/includes/parse.rs b/src/includes/parse.rs index c8806bb37..9af43f1aa 100644 --- a/src/includes/parse.rs +++ b/src/includes/parse.rs @@ -40,10 +40,8 @@ struct IncludeParser; /// /// # Return values /// Returns a tuple of an [`IncludeRef`] that represents the included text and a usize that -/// represents the end index of the include block. -/// -/// This usize is not necessarily the end index within the text string passed in, and is -/// solely based on the length of the include block and the start value. +/// represents the end index of the include block, such that start..end covers the full include +/// block (before the include goes through). pub fn parse_include_block<'t>( text: &'t str, start: usize, @@ -55,7 +53,7 @@ pub fn parse_include_block<'t>( Rule::include_normal }; - match IncludeParser::parse(rule, text) { + match IncludeParser::parse(rule, &text[start..]) { Ok(mut pairs) => { // Extract inner pairs // These actually make up the include block's tokens From 8fcb858b29dd436c3440963b65192cfe4065cff5 Mon Sep 17 00:00:00 2001 From: MrShwhale Date: Thu, 21 Mar 2024 18:34:14 -0700 Subject: [PATCH 9/9] Remove useless borrow. --- src/includes/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/includes/mod.rs b/src/includes/mod.rs index fb49359cb..9c09ea903 100644 --- a/src/includes/mod.rs +++ b/src/includes/mod.rs @@ -88,7 +88,7 @@ where mtch.as_str(), ); - match parse_include_block(&input, start, settings) { + match parse_include_block(input, start, settings) { Ok((include, end)) => { ranges.push(start..end); includes.push(include);