diff --git a/src/data/page_ref.rs b/src/data/page_ref.rs index 29ad84241..5d7cef3ff 100644 --- a/src/data/page_ref.rs +++ b/src/data/page_ref.rs @@ -40,6 +40,7 @@ pub struct PageRef<'t> { } impl<'t> PageRef<'t> { + /// Creates a [`PageRef`] with the given page and site. #[inline] pub fn page_and_site(site: S1, page: S2) -> Self where @@ -52,6 +53,7 @@ impl<'t> PageRef<'t> { } } + /// Creates a [`PageRef`] with the given page and no site. #[inline] pub fn page_only(page: S) -> Self where diff --git a/src/parsing/collect/mod.rs b/src/parsing/collect/mod.rs index 84a42d8a3..96018cf4f 100644 --- a/src/parsing/collect/mod.rs +++ b/src/parsing/collect/mod.rs @@ -35,7 +35,6 @@ mod prelude { pub use crate::parsing::prelude::*; pub use crate::parsing::rule::Rule; pub use crate::parsing::token::{ExtractedToken, Token}; - pub use crate::text::FullText; } mod consume; diff --git a/src/parsing/mod.rs b/src/parsing/mod.rs index 20f54e7c3..3720544dc 100644 --- a/src/parsing/mod.rs +++ b/src/parsing/mod.rs @@ -45,7 +45,7 @@ mod prelude { }; pub use crate::settings::WikitextSettings; pub use crate::text::FullText; - pub use crate::tree::{Element, Elements, OwnedElementsIterator}; + pub use crate::tree::{Element, Elements}; } use self::depth::{process_depths, DepthItem, DepthList}; @@ -74,7 +74,7 @@ pub use self::token::{ExtractedToken, Token}; /// Parse through the given tokens and produce an AST. /// -/// This takes a list of `ExtractedToken` items produced by `tokenize()`. +/// This takes a list of [`ExtractedToken`] items produced by [tokenize](crate::tokenizer::tokenize()). pub fn parse<'r, 't>( tokenization: &'r Tokenization<'t>, page_info: &'r PageInfo<'t>, @@ -243,8 +243,7 @@ impl NextIndex for Incrementer { } } -// Parse internal result - +/// Represents the result of an internal parse. #[derive(Serialize, Deserialize, Debug, Clone)] pub struct UnstructuredParseResult<'r, 't> { /// The returned result from parsing. diff --git a/src/parsing/paragraph/stack.rs b/src/parsing/paragraph/stack.rs index 1d829071f..06b5928a6 100644 --- a/src/parsing/paragraph/stack.rs +++ b/src/parsing/paragraph/stack.rs @@ -27,7 +27,7 @@ pub struct ParagraphStack<'t> { /// Elements being accumulated in the current paragraph. current: Vec>, - /// Previous elements created, to be outputted in the final `SyntaxTree`. + /// Previous elements created, to be outputted in the final [`SyntaxTree`]. finished: Vec>, /// Gathered errors from paragraph parsing. @@ -91,6 +91,7 @@ impl<'t> ParagraphStack<'t> { } } + /// Creates a paragraph element out of this instance's current elements. pub fn build_paragraph(&mut self) -> Option> { debug!( "Building paragraph from current stack state (length {})", @@ -111,6 +112,7 @@ impl<'t> ParagraphStack<'t> { Some(element) } + /// Set the finished field in this struct to the paragraph element. pub fn end_paragraph(&mut self) { debug!("Ending the current paragraph to push as a completed element"); diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 59b5b34a6..1427ba8b4 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -32,6 +32,7 @@ use std::{mem, ptr}; const MAX_RECURSION_DEPTH: usize = 100; +/// Parser for a set of tokens. #[derive(Debug, Clone)] pub struct Parser<'r, 't> { // Page and parse information @@ -204,7 +205,7 @@ impl<'r, 't> Parser<'r, 't> { } } - // Table of Contents + /// Add heading element to table of contents. pub fn push_table_of_contents_entry( &mut self, heading: HeadingLevel, diff --git a/src/parsing/rule/impls/block/blocks/later.rs b/src/parsing/rule/impls/block/blocks/later.rs index 3b15af120..43e21323e 100644 --- a/src/parsing/rule/impls/block/blocks/later.rs +++ b/src/parsing/rule/impls/block/blocks/later.rs @@ -25,7 +25,7 @@ //! (not to be confused with `MiniRecentThreads`) which only //! outputted "later." and no other functionality. //! -//! See https://twitter.com/wikidotbugs/status/1328588862218702850 +//! See use super::prelude::*; diff --git a/src/parsing/rule/impls/block/blocks/mod.rs b/src/parsing/rule/impls/block/blocks/mod.rs index d278eec68..7865c168e 100644 --- a/src/parsing/rule/impls/block/blocks/mod.rs +++ b/src/parsing/rule/impls/block/blocks/mod.rs @@ -20,11 +20,9 @@ mod prelude { pub use super::super::{Arguments, BlockRule}; - pub use crate::parsing::collect::*; - pub use crate::parsing::condition::ParseCondition; pub use crate::parsing::parser::Parser; pub use crate::parsing::prelude::*; - pub use crate::parsing::{ParseError, Token}; + pub use crate::parsing::ParseError; pub use crate::tree::{Container, ContainerType, Element}; #[cfg(debug)] diff --git a/src/parsing/rule/impls/mod.rs b/src/parsing/rule/impls/mod.rs index 0439257dc..fd354408b 100644 --- a/src/parsing/rule/impls/mod.rs +++ b/src/parsing/rule/impls/mod.rs @@ -25,11 +25,10 @@ mod prelude { pub use crate::parsing::consume::consume; pub use crate::parsing::error::{ParseError, ParseErrorKind}; pub use crate::parsing::parser::Parser; - pub use crate::parsing::result::{ParseResult, ParseSuccess}; + pub use crate::parsing::result::ParseResult; pub use crate::parsing::rule::{LineRequirement, Rule}; pub use crate::parsing::token::{ExtractedToken, Token}; - pub use crate::text::FullText; - pub use crate::tree::{AttributeMap, Container, ContainerType, Element, Elements}; + pub use crate::tree::{AttributeMap, ContainerType, Element, Elements}; } mod anchor; diff --git a/src/parsing/rule/mod.rs b/src/parsing/rule/mod.rs index a7d81ef3d..c644449af 100644 --- a/src/parsing/rule/mod.rs +++ b/src/parsing/rule/mod.rs @@ -26,7 +26,7 @@ mod mapping; pub mod impls; -pub use self::mapping::{get_rules_for_token, RULE_MAP}; +pub use self::mapping::get_rules_for_token; /// Defines a rule that can possibly match tokens and return an `Element`. #[derive(Copy, Clone)] diff --git a/src/parsing/token/mod.rs b/src/parsing/token/mod.rs index 9c9d05e88..5cdf87a16 100644 --- a/src/parsing/token/mod.rs +++ b/src/parsing/token/mod.rs @@ -38,6 +38,7 @@ use pest::Parser; use std::ops::Range; use strum_macros::IntoStaticStr; +/// Struct that represents a token in a specific text. #[derive(Serialize, Debug, Clone, PartialEq, Eq)] pub struct ExtractedToken<'a> { pub token: Token, @@ -46,6 +47,8 @@ pub struct ExtractedToken<'a> { } impl<'a> ExtractedToken<'a> { + /// Returns a new object with the same values, except with span refering to the byte indicies + /// of the text if it were in UTF-16 rather than in UTF-8. #[must_use] pub fn to_utf16_indices(&self, map: &Utf16IndexMap) -> Self { // Copy fields @@ -61,6 +64,8 @@ impl<'a> ExtractedToken<'a> { } } +/// Enum that represents the type of a parsed token. For a struct with additional context +/// surrounding the positioning and content of the token, see [`ExtractedToken`]. #[derive( Serialize, Deserialize, Enum, IntoStaticStr, Debug, Copy, Clone, PartialEq, Eq, )] @@ -163,6 +168,10 @@ pub enum Token { } impl Token { + /// Extracts all tokens from the given text. + /// # Errors + /// Returns an error if something goes wrong with the parsing process. This will result in the + /// only [`Token`] being a raw text containing all of the input. pub(crate) fn extract_all(text: &str) -> Vec { info!("Running lexer on input"); @@ -196,7 +205,7 @@ impl Token { } } - /// Converts a single `Pair` from pest into its corresponding `ExtractedToken`. + /// Converts a single [`Pair`] from pest into its corresponding [`ExtractedToken`]. fn convert_pair(pair: Pair) -> ExtractedToken { // Extract values from the Pair let rule = pair.as_rule(); @@ -212,7 +221,7 @@ impl Token { ExtractedToken { token, slice, span } } - /// Mapping of a pest `Rule` to its corresponding `Token` enum. + /// Maps each pest [`Rule`] to its corresponding [`Token`]. fn get_from_rule(rule: Rule) -> Token { match rule { // Symbols diff --git a/src/render/html/element/mod.rs b/src/render/html/element/mod.rs index 57a0ea37d..e9369d77c 100644 --- a/src/render/html/element/mod.rs +++ b/src/render/html/element/mod.rs @@ -46,7 +46,7 @@ mod prelude { pub use super::super::context::HtmlContext; pub use super::super::random::Random; pub use super::{render_element, render_elements}; - pub use crate::tree::{Element, SyntaxTree}; + pub use crate::tree::Element; } use self::bibliography::{render_bibcite, render_bibliography}; diff --git a/src/render/mod.rs b/src/render/mod.rs index b44e27813..8af85bc3a 100644 --- a/src/render/mod.rs +++ b/src/render/mod.rs @@ -18,6 +18,7 @@ * along with this program. If not, see . */ +#[allow(unused_imports)] mod prelude { pub use super::Render; pub use crate::data::PageInfo; diff --git a/src/settings/interwiki.rs b/src/settings/interwiki.rs index 80248685a..0619506ab 100644 --- a/src/settings/interwiki.rs +++ b/src/settings/interwiki.rs @@ -22,9 +22,23 @@ use once_cell::sync::Lazy; use std::borrow::Cow; use std::collections::HashMap; +/// An [`InterwikiSettings`] instance that has no prefixes. pub static EMPTY_INTERWIKI: Lazy = Lazy::new(|| InterwikiSettings { prefixes: hashmap! {}, }); + +#[allow(rustdoc::bare_urls)] +/// An [`InterwikiSettings`] instance that has the default prefixes. +/// +/// These prefixes are: +/// - `wikipedia:path` => `https://wikipedia.org/wiki/path` +/// - `wp:path` => `https://wikipedia.org/wiki/path` +/// - `commons:path` => `https://commons.wikimedia.org/wiki/path` +/// - `google:path` => `https://google.com/search?q=path` +/// - `duckduckgo:path` => `https://duckduckgo.com/?q=path` +/// - `ddg:path` => `https://duckduckgo.com/?q=path` +/// - `dictionary:path` => `https://dictionary.com/browse/path` +/// - `thesaurus:path` => `https://thesaurus.com/browse/path` pub static DEFAULT_INTERWIKI: Lazy = Lazy::new(|| InterwikiSettings { prefixes: hashmap! { cow!("wikipedia") => cow!("https://wikipedia.org/wiki/$$"), @@ -38,18 +52,35 @@ pub static DEFAULT_INTERWIKI: Lazy = Lazy::new(|| InterwikiSe }, }); +/// Settings that determine how to turn [`interwiki links`](http://org.wikidot.com/doc:wiki-syntax#toc21) +/// into full URLs. #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq, Eq)] pub struct InterwikiSettings { #[serde(flatten)] + /// A map from each interwiki prefix to the interwiki URL. A '$$' in the URL indicates where the path specified in + /// the Wikijump interwiki block should go. pub prefixes: HashMap, Cow<'static, str>>, } impl InterwikiSettings { + /// Creates a new instance with no prefixes. #[inline] pub fn new() -> Self { InterwikiSettings::default() } + /// Creates a full URL from an interwiki link. + /// # Example + /// ``` + /// # use ftml::settings::*; + /// assert_eq!(DEFAULT_INTERWIKI.build("wikipedia:Mallard").unwrap(), "https://wikipedia.org/wiki/Mallard"); + /// ``` + /// + /// Returns None if: + /// - The link starts with a colon + /// - There is no colon in the link + /// - There is nothing after the colon + /// - The interwiki prefix is not found pub fn build(&self, link: &str) -> Option { match link.find(':') { // Starting with a colon is not interwiki, skip. @@ -145,4 +176,5 @@ fn interwiki_prefixes() { check!("thesaurus:oak", Some("https://thesaurus.com/browse/oak")); check!("banana:fruit-salad", None); check!(":empty", None); + check!("no-link:", None); } diff --git a/src/settings/mod.rs b/src/settings/mod.rs index 3b9eb008e..a03fa0889 100644 --- a/src/settings/mod.rs +++ b/src/settings/mod.rs @@ -93,6 +93,7 @@ pub struct WikitextSettings { } impl WikitextSettings { + /// Returns the default settings for the given [`WikitextMode`]. pub fn from_mode(mode: WikitextMode) -> Self { let interwiki = DEFAULT_INTERWIKI.clone(); diff --git a/src/tokenizer.rs b/src/tokenizer.rs index b720745da..31467466b 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -21,6 +21,7 @@ use crate::parsing::{ExtractedToken, Token}; use crate::text::FullText; +/// Struct that represents both a list of tokens and the text the tokens were generated from. #[derive(Debug, Clone)] pub struct Tokenization<'t> { tokens: Vec>, diff --git a/src/tree/attribute/safe.rs b/src/tree/attribute/safe.rs index 4bd77faca..1a3d2accf 100644 --- a/src/tree/attribute/safe.rs +++ b/src/tree/attribute/safe.rs @@ -39,7 +39,7 @@ macro_rules! hashset_unicase { /// List of safe attributes. All others will be filtered out. /// -/// See https://scuttle.atlassian.net/wiki/spaces/WD/pages/1030782977/Allowed+Attributes+in+Wikitext +/// See pub static SAFE_ATTRIBUTES: Lazy>> = Lazy::new(|| { hashset_unicase![ "accept", diff --git a/src/tree/element/object.rs b/src/tree/element/object.rs index 5abe8b6fd..b83515f0c 100644 --- a/src/tree/element/object.rs +++ b/src/tree/element/object.rs @@ -29,6 +29,7 @@ use ref_map::*; use std::borrow::Cow; use std::num::NonZeroU32; +/// Represents an element to be rendered. #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] #[serde(rename_all = "kebab-case", tag = "element", content = "data")] pub enum Element<'t> { @@ -367,7 +368,7 @@ impl Element<'_> { /// This is to avoid making the call very expensive, but for a complete /// understanding of the paragraph requirements, see the `Elements` return. /// - /// See https://developer.mozilla.org/en-US/docs/Web/Guide/HTML/Content_categories#phrasing_content + /// See pub fn paragraph_safe(&self) -> bool { match self { Element::Container(container) => container.ctype().paragraph_safe(), diff --git a/src/tree/partial.rs b/src/tree/partial.rs index b4b62535d..1c0a04f0c 100644 --- a/src/tree/partial.rs +++ b/src/tree/partial.rs @@ -42,7 +42,7 @@ pub enum PartialElement<'t> { /// Text associated with a Ruby annotation. /// - /// Outputs HTML ``. See also https://developer.mozilla.org/en-US/docs/Web/HTML/Element/ruby. + /// Outputs HTML ``. See also . RubyText(RubyText<'t>), } diff --git a/src/utf16.rs b/src/utf16.rs index b3a02e087..85903c4d3 100644 --- a/src/utf16.rs +++ b/src/utf16.rs @@ -41,7 +41,7 @@ pub struct Utf16IndexMap<'t> { impl<'t> Utf16IndexMap<'t> { /// Produces a mapping of UTF-8 byte index to UTF-16 index. /// - /// This enables objects to be converted into using character indices + /// This enables objects to be converted from UTF-8 into UTF-16 using character indices /// for strings rather than byte indices. This is useful for environments /// which do use UTF-16 strings, such as Javascript (via WebASM). pub fn new(text: &'t str) -> Self {