From 8990deedc34bad1e0129b14df446b77a235f1b9e Mon Sep 17 00:00:00 2001 From: Thomas Churchman Date: Wed, 1 May 2024 21:29:10 +0200 Subject: [PATCH] fix: generate absolute URL for permalinks And keep create a struct for keeping track of site build context. --- src/ctx.rs | 89 +++++++++++++++++++++++++++++++ src/djot.rs | 8 ++- src/main.rs | 54 ++++++++----------- src/render.rs | 145 ++++++++++++++++++++++++++------------------------ src/types.rs | 42 ++++++++++----- src/utils.rs | 43 +++++++++++++-- 6 files changed, 257 insertions(+), 124 deletions(-) create mode 100644 src/ctx.rs diff --git a/src/ctx.rs b/src/ctx.rs new file mode 100644 index 0000000..5181895 --- /dev/null +++ b/src/ctx.rs @@ -0,0 +1,89 @@ +use std::{path::Path, sync::Arc}; + +use crate::{cli::BuildKind, config::SiteConfig, utils}; + +struct InnerCtx { + build_kind: BuildKind, + base_url: String, +} + +/// Site build context. The context is cheap to clone. +#[derive(Clone)] +pub struct Ctx { + inner: Arc, +} + +impl Ctx { + pub fn from_site_config(build_kind: BuildKind, site_config: &SiteConfig) -> Self { + let base_url = if build_kind.is_production() { + &site_config.base_url + } else { + &site_config.base_url_develop + }; + Ctx { + inner: Arc::new(InnerCtx { + build_kind, + base_url: base_url.clone(), + }), + } + } + + pub fn build_kind(&self) -> BuildKind { + self.inner.build_kind + } + + pub fn base_url(&self) -> &str { + &self.inner.base_url + } + + /// Turn a path relative to the output directory into an absolute URL. + pub fn path_to_absolute_url(&self, path: impl AsRef) -> anyhow::Result { + let mut url = utils::path_to_url(Some(self.base_url()), path)?; + if url.ends_with("/index.html") { + url.truncate(url.len() - "/index.html".len()); + } + Ok(url) + } +} + +#[cfg(test)] +mod test { + #[test] + fn path_to_absolute_url() { + use super::{BuildKind, Ctx, SiteConfig}; + use std::path::PathBuf; + + let site_config: SiteConfig = toml::from_str( + r#" + base-url = "http://localhost:8080" + base-url-develop = ".." + trim-url-index = true + "#, + ) + .unwrap(); + let ctx = Ctx::from_site_config(BuildKind::Production, &site_config); + + assert_eq!( + ctx.path_to_absolute_url("").unwrap(), + "http://localhost:8080" + ); + assert_eq!( + ctx.path_to_absolute_url("index.html").unwrap(), + "http://localhost:8080" + ); + assert_eq!( + ctx.path_to_absolute_url(PathBuf::from("a").join("nested").join("file.xml")) + .unwrap(), + "http://localhost:8080/a/nested/file.xml" + ); + assert_eq!( + ctx.path_to_absolute_url(PathBuf::from("a").join("nested").join("index.html")) + .unwrap(), + "http://localhost:8080/a/nested" + ); + assert_eq!( + ctx.path_to_absolute_url("no-extension").unwrap(), + "http://localhost:8080/no-extension" + ); + } +} diff --git a/src/djot.rs b/src/djot.rs index bb527ff..809088c 100644 --- a/src/djot.rs +++ b/src/djot.rs @@ -16,14 +16,12 @@ use crate::{highlight, types, utils}; pub fn rewrite_and_emit_internal_links<'entries>( events: &mut Vec>, entries_by_name: &HashMap<&str, &'entries types::EntryMetaAndFrontMatter<'entries>>, - root_url: &str, ) -> anyhow::Result>> { let mut internal_links = vec![]; fn rewrite_link<'entries>( old_link: &mut Cow<'_, str>, entries_by_name: &HashMap<&str, &'entries types::EntryMetaAndFrontMatter<'entries>>, - root_url: &str, ) -> anyhow::Result>> { if &old_link[0..2] == "~/" { let (link, anchor) = match old_link.find('#') { @@ -32,7 +30,7 @@ pub fn rewrite_and_emit_internal_links<'entries>( }; if let Some(entry) = entries_by_name.get(link) { - *old_link = Cow::Owned(format!("{root_url}/{}{}", &entry.meta.permalink, anchor)); + *old_link = Cow::Owned(format!("{}{}", &entry.meta.permalink, anchor)); return Ok(Some(entry)); } else { anyhow::bail!("Unknown internal link: {old_link}"); @@ -45,12 +43,12 @@ pub fn rewrite_and_emit_internal_links<'entries>( for event in events { match event { Event::Start(Container::Link(link, _), _) => { - if let Some(entry) = rewrite_link(link, entries_by_name, root_url)? { + if let Some(entry) = rewrite_link(link, entries_by_name)? { internal_links.push(entry); } } Event::End(Container::Link(link, _)) => { - rewrite_link(link, entries_by_name, root_url)?; + rewrite_link(link, entries_by_name)?; } _ => {} } diff --git a/src/main.rs b/src/main.rs index e36cc3d..b4349b2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,6 +10,7 @@ use notify_debouncer_full::{new_debouncer, notify::*, DebounceEventResult}; mod cli; mod config; +mod ctx; mod djot; mod front_matter; mod highlight; @@ -19,10 +20,9 @@ mod render; mod types; mod utils; +use ctx::Ctx; use out::Out; -use crate::utils::path_to_http_url; - #[derive(Debug)] struct Group { name: String, @@ -45,6 +45,7 @@ impl Group { } fn collect_entries<'a>( + ctx: &'a Ctx, path_prefix: &'a Path, path: &Path, ) -> impl Iterator> + 'a { @@ -59,7 +60,11 @@ fn collect_entries<'a>( if entry.file_type().is_dir() || !name.ends_with(".dj") { None } else { - Some(types::EntryMeta::entry_from_path(path_prefix, entry.path())) + Some(types::EntryMeta::entry_from_path( + ctx, + path_prefix, + entry.path(), + )) } } Err(err) => Some(Err(err.into())), @@ -67,6 +72,7 @@ fn collect_entries<'a>( } fn collect_entry_groups( + ctx: &Ctx, path: impl AsRef, ) -> anyhow::Result<(Vec, Vec)> { let path = path.as_ref(); @@ -93,7 +99,7 @@ fn collect_entry_groups( if group.file_type().is_dir() { let start_idx = entries.len(); - for entry in collect_entries(path, walk_path) { + for entry in collect_entries(ctx, path, walk_path) { entries.push(entry?); } let end_idx = entries.len(); @@ -104,15 +110,10 @@ fn collect_entry_groups( anyhow::Ok((groups, entries)) } -fn build( - path: &Path, - build_kind: cli::BuildKind, - renderer: &render::Renderer, - base_url: &str, -) -> anyhow::Result<()> { +fn build(ctx: &Ctx, path: &Path, renderer: &render::Renderer) -> anyhow::Result<()> { let out = Out::at("./out")?; - let (groups, entries) = collect_entry_groups(path.join("entries"))?; + let (groups, entries) = collect_entry_groups(&ctx, path.join("entries"))?; log::info!("Found {} entry group(s):", groups.len()); for group in groups.iter() { @@ -180,7 +181,7 @@ fn build( }); // When in production-mode, filter out non-released entries - let (groups, entries, mut parsed, front_matter) = if build_kind.is_production() { + let (groups, entries, mut parsed, front_matter) = if ctx.build_kind().is_production() { let before = entries.len(); let mut groups = groups; @@ -246,7 +247,7 @@ fn build( .enumerate() .map(|(linker_idx, parsed)| { let internal_links = - djot::rewrite_and_emit_internal_links(parsed, &entries_by_name, base_url)?; + djot::rewrite_and_emit_internal_links(parsed, &entries_by_name)?; let mut linkee_indices = internal_links .into_iter() @@ -382,9 +383,11 @@ fn build( }; let page_permalink = { let out_file = out_file.clone(); + // can we do away with this clone? + let ctx = ctx.clone(); move |page| -> String { let path = out_file(page); - path_to_http_url(path).unwrap() + ctx.path_to_absolute_url(path).unwrap() } }; @@ -533,23 +536,16 @@ fn main() -> anyhow::Result<()> { }; let site_config = site_config.as_ref().unwrap(); - let base_url = if build_kind.is_production() { - &site_config.base_url - } else { - &site_config.base_url_develop - }; + let ctx = Ctx::from_site_config(build_kind, site_config); if config_changed || matches!(change, FsChange::Template) { log::info!("Reloading templates…"); - renderer = Some(render::Renderer::build( - base_url.to_owned(), - args.path.join("templates"), - )?); + renderer = Some(render::Renderer::build(&ctx, args.path.join("templates"))?); } log::info!("Building…"); let instant = std::time::Instant::now(); - if let Err(err) = build(&args.path, build_kind, renderer.as_ref().unwrap(), base_url) { + if let Err(err) = build(&ctx, &args.path, renderer.as_ref().unwrap()) { log::error!("{:?}", err); } log::info!( @@ -579,13 +575,9 @@ fn main() -> anyhow::Result<()> { } else { let site_config: config::SiteConfig = toml::from_str(&std::fs::read_to_string(&site_config_path)?)?; - let base_url = if build_kind.is_production() { - &site_config.base_url - } else { - &site_config.base_url_develop - }; - let renderer = render::Renderer::build(base_url.to_owned(), args.path.join("templates"))?; - build(&args.path, build_kind, &renderer, base_url)?; + let ctx = Ctx::from_site_config(build_kind, &site_config); + let renderer = render::Renderer::build(&ctx, args.path.join("templates"))?; + build(&ctx, &args.path, &renderer)?; } Ok(()) diff --git a/src/render.rs b/src/render.rs index 5241da4..42b54c7 100644 --- a/src/render.rs +++ b/src/render.rs @@ -5,7 +5,8 @@ use std::{ path::{Path, PathBuf}, }; -use crate::{types, utils::path_to_http_url}; +use crate::types; +use crate::Ctx; thread_local! { // There is one Renderer instance for rendering, which has a `minijinja::Environment` where a @@ -26,7 +27,7 @@ struct Paginator { } impl Paginator { - pub fn new(per_page: u32, item_count: usize) -> Self { + pub fn new(ctx: &Ctx, per_page: u32, item_count: usize) -> Self { PAGE_PERMALINK.with_borrow(|page_permalink| { let page_permalink = page_permalink.as_ref().unwrap(); @@ -37,7 +38,7 @@ impl Paginator { current_page: 0, last_page, page_permalinks: (0u32..=last_page) - .map(|page| path_to_http_url((*page_permalink)(page)).unwrap()) + .map(|page| ctx.path_to_absolute_url((*page_permalink)(page)).unwrap()) .collect(), } }) @@ -60,68 +61,72 @@ fn pagination_reset() { /// /// The first call per template render sets up the paginator. Subsequent calls ignore the arguments /// and return the same result. -fn paginate(items: &minijinja::Value, per_page: u32) -> Result { - PAGINATOR.with_borrow_mut(|paginator| { - if paginator.is_none() { - let item_count = if items.is_number() { - usize::try_from(items.clone()).ok() - } else if let Some(seq) = items.as_seq() { - Some(seq.item_count()) - } else { - None - }; - let item_count = item_count.ok_or(minijinja::Error::new( - minijinja::ErrorKind::InvalidOperation, - "`items` argument is neither a number nor a sequence", - ))?; - - *paginator = Some(Paginator::new(per_page, item_count)); - } - let paginator = paginator.as_ref().unwrap(); - - let page_start = paginator.current_page as usize * paginator.per_page as usize; - let page_end = (page_start + paginator.per_page as usize).min(paginator.item_count); - - let is_first_page = paginator.current_page == 0; - let is_last_page = paginator.current_page == paginator.last_page; - - Ok(minijinja::context! { - item_count => paginator.item_count, - page_count => paginator.last_page + 1, - current_page => paginator.current_page, - indices => (page_start..page_end).collect::>(), - is_first_page => is_first_page, - is_last_page => is_last_page, - previous => if is_first_page { - None - } else { - Some(paginator.page_permalinks[(paginator.current_page-1) as usize].clone()) - }, - next => if is_last_page { - None - } else { - Some(paginator.page_permalinks[(paginator.current_page+1) as usize].clone()) - }, - page_permalinks => paginator.page_permalinks, +fn gen_paginate( + ctx: Ctx, +) -> impl Fn(&minijinja::Value, u32) -> Result { + move |items, per_page| { + PAGINATOR.with_borrow_mut(|paginator| { + if paginator.is_none() { + let item_count = if items.is_number() { + usize::try_from(items.clone()).ok() + } else if let Some(seq) = items.as_seq() { + Some(seq.item_count()) + } else { + None + }; + let item_count = item_count.ok_or(minijinja::Error::new( + minijinja::ErrorKind::InvalidOperation, + "`items` argument is neither a number nor a sequence", + ))?; + + *paginator = Some(Paginator::new(&ctx, per_page, item_count)); + } + let paginator = paginator.as_ref().unwrap(); + + let page_start = paginator.current_page as usize * paginator.per_page as usize; + let page_end = (page_start + paginator.per_page as usize).min(paginator.item_count); + + let is_first_page = paginator.current_page == 0; + let is_last_page = paginator.current_page == paginator.last_page; + + Ok(minijinja::context! { + item_count => paginator.item_count, + page_count => paginator.last_page + 1, + current_page => paginator.current_page, + indices => (page_start..page_end).collect::>(), + is_first_page => is_first_page, + is_last_page => is_last_page, + previous => if is_first_page { + None + } else { + Some(paginator.page_permalinks[(paginator.current_page-1) as usize].clone()) + }, + next => if is_last_page { + None + } else { + Some(paginator.page_permalinks[(paginator.current_page+1) as usize].clone()) + }, + page_permalinks => paginator.page_permalinks, + }) }) - }) + } } pub struct Renderer { - base_url: String, + ctx: Ctx, t: Environment<'static>, } #[derive(Clone, Copy, serde::Serialize)] -struct Ctx<'ctx> { +struct TemplateCtx<'ctx> { base_url: &'ctx str, entries: &'ctx HashMap<&'ctx str, &'ctx [types::Entry<'ctx>]>, } #[derive(Clone, Copy)] -pub struct RenderContext<'ctx> { +pub struct RenderCtx<'ctx> { renderer: &'ctx Renderer, - ctx: Ctx<'ctx>, + ctx: TemplateCtx<'ctx>, } /// Minijinja filter to add leading zeros to a numeric value. @@ -134,29 +139,24 @@ fn leading_zeros(val: minijinja::Value, leading_zeros: u8) -> Result) -> anyhow::Result { + pub fn build(ctx: &Ctx, template_path: impl AsRef) -> anyhow::Result { let mut t = Environment::new(); t.set_undefined_behavior(minijinja::UndefinedBehavior::Chainable); - t.add_function("paginate", paginate); + t.add_function("paginate", gen_paginate(ctx.clone())); t.add_filter("leading_zeros", leading_zeros); { - let root_url = base_url.clone(); + let ctx = ctx.clone(); t.add_filter( "path_to_url", move |path: ViaDeserialize| -> Result { - let mut url = String::new(); - url.push_str(&root_url); - for part in path.iter() { - url.push('/'); - url.push_str(part.to_str().ok_or_else(|| { - minijinja::Error::new( - minijinja::ErrorKind::InvalidOperation, - format!("path is not valid unicode: {:?}", &path.0), - ) - })?); - } + let url = ctx.path_to_absolute_url(&*path).map_err(|_| { + minijinja::Error::new( + minijinja::ErrorKind::InvalidOperation, + format!("path is not valid unicode: {:?}", &path.0), + ) + })?; Ok(url) }, @@ -165,24 +165,27 @@ impl Renderer { t.set_loader(minijinja::path_loader(template_path)); - Ok(Renderer { base_url, t }) + Ok(Renderer { + ctx: ctx.clone(), + t, + }) } pub fn render_context<'ctx>( &'ctx self, entries: &'ctx HashMap<&'ctx str, &'ctx [types::Entry<'ctx>]>, - ) -> RenderContext<'ctx> { - RenderContext { + ) -> RenderCtx<'ctx> { + RenderCtx { renderer: self, - ctx: Ctx { - base_url: &self.base_url, + ctx: TemplateCtx { + base_url: &self.ctx.base_url(), entries, }, } } } -impl RenderContext<'_> { +impl RenderCtx<'_> { pub fn entry( &self, write: impl std::io::Write, diff --git a/src/types.rs b/src/types.rs index 24457a2..f2f6190 100644 --- a/src/types.rs +++ b/src/types.rs @@ -1,9 +1,11 @@ use anyhow::anyhow; use std::{ - borrow::Cow, collections::HashMap, path::{Path, PathBuf} + borrow::Cow, + collections::HashMap, + path::{Path, PathBuf}, }; -use crate::utils; +use crate::{utils, Ctx}; #[derive(Clone, Copy, Debug, PartialEq, Eq, serde::Serialize)] pub struct Date { @@ -155,7 +157,7 @@ pub struct Entry<'m> { } impl EntryMeta { - pub fn entry_from_path(path_prefix: &Path, path: &Path) -> anyhow::Result { + pub fn entry_from_path(ctx: &Ctx, path_prefix: &Path, path: &Path) -> anyhow::Result { let mut path_without_prefix = path.strip_prefix(path_prefix)?.iter(); let group = path_without_prefix .next() @@ -190,12 +192,16 @@ impl EntryMeta { } else { Cow::Owned(path.with_extension("")) }; - utils::path_to_http_url(path)? + utils::path_to_url(None, path)? }; let (dt, slug) = file_name_into_date_and_slug(&file_name); if let Some(dt) = dt { let (date, time) = dt; + let out_file = PathBuf::from(format!("{}", date.year)) + .join(slug) + .join("index.html"); + let out_asset_dir = PathBuf::from(format!("{}", date.year)).join(slug); Ok(EntryMeta { sort_key: file_name.to_owned(), group, @@ -205,25 +211,35 @@ impl EntryMeta { file_path: path.to_owned(), asset_dir: parent_dir.to_owned(), canonical_name, - out_file: PathBuf::from(format!("{}", date.year)).join(format!("{slug}.html")), - out_asset_dir: PathBuf::from(format!("{}", date.year)).join(slug), - asset_url: format!("{}/{slug}", date.year), - permalink: format!("{}/{slug}.html", date.year), + permalink: ctx.path_to_absolute_url(&out_file).expect("valid path"), + asset_url: ctx + .path_to_absolute_url(&out_asset_dir) + .expect("valid path"), + out_file, + out_asset_dir, }) } else { + let out_file = if slug == "index" { + PathBuf::from(format!("{slug}.html")) + } else { + PathBuf::from(slug).join("index.html") + }; + let out_asset_dir = PathBuf::from(slug); Ok(EntryMeta { sort_key: file_name.to_owned(), group, - out_asset_dir: PathBuf::from(&file_name), - out_file: PathBuf::from(format!("{slug}.html")), date: None, time: None, slug: slug.to_owned(), - canonical_name, file_path: path.to_owned(), asset_dir: parent_dir.to_owned(), - asset_url: format!("{slug}"), - permalink: format!("{slug}.html"), + canonical_name, + permalink: ctx.path_to_absolute_url(&out_file).expect("valid path"), + asset_url: ctx + .path_to_absolute_url(&out_asset_dir) + .expect("valid path"), + out_file, + out_asset_dir, }) } } diff --git a/src/utils.rs b/src/utils.rs index 8292246..1672f3d 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,17 +1,32 @@ use anyhow::anyhow; use std::path::Path; -pub fn path_to_http_url(path: impl AsRef) -> anyhow::Result { +/// Turn a path into a URL with a given prefix. If a scheme and host is given, the path becomes an +/// absolute URL. +pub fn path_to_url( + scheme_and_host: Option<&str>, + path: impl AsRef, +) -> anyhow::Result { let path = path.as_ref(); - let mut builder = String::new(); + + // allocate roughly enough for the resulting string + let mut builder = String::with_capacity( + (scheme_and_host.map(|s| s.len() + 1).unwrap_or(0) + + path.into_iter().map(|p| p.len()).sum::()) + .next_power_of_two(), + ); + if let Some(s) = scheme_and_host { + builder.push_str(s); + } for (idx, part) in path.into_iter().enumerate() { - if idx > 0 { + if idx > 0 || scheme_and_host.is_some() { builder.push('/'); } builder.push_str(part.to_str().ok_or(anyhow!("expected UTF-8 path"))?); } + builder.shrink_to_fit(); Ok(builder) } @@ -25,7 +40,7 @@ pub fn path_to_http_url(path: impl AsRef) -> anyhow::Result { /// `None`. /// /// # Examples -/// +/// /// ```rust /// let inner_iter = vec![Ok(1), Ok(2), Err(42), Ok(4)].into_iter(); /// let mut resulting_iter = @@ -116,6 +131,26 @@ pub fn process_results_iter< #[cfg(test)] mod test { + #[test] + fn path_to_url() { + use super::path_to_url; + use std::path::PathBuf; + + assert_eq!(path_to_url(None, "index.html").unwrap(), "index.html"); + assert_eq!( + path_to_url(Some("https://example.com"), "index.html").unwrap(), + "https://example.com/index.html" + ); + assert_eq!( + path_to_url( + Some("https://example.com"), + PathBuf::from("nested").join("file.xml") + ) + .unwrap(), + "https://example.com/nested/file.xml" + ); + } + #[test] pub fn process_results_iter() { use super::process_results_iter;