From de9338fe3fe009c51542d9a09c7a3c8fa0f929ed Mon Sep 17 00:00:00 2001 From: Yuriy Larin Date: Wed, 7 Feb 2024 17:07:27 +0200 Subject: [PATCH 1/3] refactored resources code --- src/bin/mdbook-epub.rs | 19 +- src/errors.rs | 63 +++ src/generator.rs | 12 +- src/lib.rs | 83 +--- src/resources.rs | 923 +------------------------------------ src/resources/asset.rs | 152 ++++++ src/resources/resources.rs | 712 ++++++++++++++++++++++++++++ src/resources/retrieve.rs | 124 +++++ tests/integration_tests.rs | 25 +- 9 files changed, 1097 insertions(+), 1016 deletions(-) create mode 100644 src/errors.rs create mode 100644 src/resources/asset.rs create mode 100644 src/resources/resources.rs create mode 100644 src/resources/retrieve.rs diff --git a/src/bin/mdbook-epub.rs b/src/bin/mdbook-epub.rs index 4b8900310..8e6accb2d 100644 --- a/src/bin/mdbook-epub.rs +++ b/src/bin/mdbook-epub.rs @@ -1,19 +1,20 @@ -use ::env_logger; #[macro_use] extern crate log; -use ::mdbook; -use ::mdbook_epub; -use ::serde_json; -use ::structopt; -use mdbook::renderer::RenderContext; -use mdbook::MDBook; use std::io; use std::path::PathBuf; use std::process; + +use ::env_logger; +use ::mdbook; +use ::serde_json; +use ::structopt; +use mdbook::MDBook; +use mdbook::renderer::RenderContext; use structopt::StructOpt; -use mdbook_epub::Error; +use ::mdbook_epub; +use mdbook_epub::errors::Error; fn main() { env_logger::init(); @@ -50,7 +51,7 @@ fn run(args: &Args) -> Result<(), Error> { println!("Running mdbook-epub as plugin..."); serde_json::from_reader(io::stdin()).map_err(|_| Error::RenderContext)? }; - + // calling the main code for epub creation mdbook_epub::generate(&ctx)?; info!( "Book is READY in directory: '{}'", diff --git a/src/errors.rs b/src/errors.rs new file mode 100644 index 000000000..97972ffab --- /dev/null +++ b/src/errors.rs @@ -0,0 +1,63 @@ +use thiserror::Error; +use std::path::PathBuf; + +#[derive(Error, Debug)] +pub enum Error { + #[error("Incompatible mdbook version got {0} expected {1}")] + IncompatibleVersion(String, String), + + #[error("{0}")] + EpubDocCreate(String), + + #[error("Could not parse the template")] + TemplateParse, + + #[error("Content file was not found: \'{0}\'")] + ContentFileNotFound(String), + + #[error("{0}")] + AssetFileNotFound(String), + + #[error("Asset was not a file {0}")] + AssetFile(PathBuf), + + #[error("Could not open css file {0}")] + CssOpen(PathBuf), + + #[error("Unable to open template {0}")] + OpenTemplate(PathBuf), + + #[error("Unable to parse render context")] + RenderContext, + + #[error("Unable to open asset")] + AssetOpen, + + #[error("Error reading stylesheet")] + StylesheetRead, + + #[error("epubcheck has failed: {0}")] + EpubCheck(String), + + #[error(transparent)] + Io(#[from] std::io::Error), + + #[error(transparent)] + Book(#[from] mdbook::errors::Error), + #[error(transparent)] + Semver(#[from] semver::Error), + #[error(transparent)] + EpubBuilder(#[from] eyre::Report), + #[error(transparent)] + Render(#[from] handlebars::RenderError), + #[error(transparent)] + TomlDeser(#[from] toml::de::Error), + #[error(transparent)] + HttpError(#[from] Box), +} + +impl From for Error { + fn from(e: ureq::Error) -> Self { + Error::HttpError(Box::new(e)) + } +} diff --git a/src/generator.rs b/src/generator.rs index 5043e155b..cec2fa9e6 100644 --- a/src/generator.rs +++ b/src/generator.rs @@ -13,14 +13,15 @@ use handlebars::{Handlebars, RenderError}; use html_parser::{Dom, Node}; use mdbook::book::{BookItem, Chapter}; use mdbook::renderer::RenderContext; -use pulldown_cmark::{html, CowStr, Event, Tag}; +use pulldown_cmark::{CowStr, Event, html, Tag}; use url::Url; use crate::config::Config; -use crate::resources::handler::{ContentRetriever, ResourceHandler}; -use crate::resources::{self, Asset, AssetKind}; +use crate::resources::retrieve::{ContentRetriever, ResourceHandler}; +use crate::resources::resources::{self}; use crate::DEFAULT_CSS; -use crate::{utils, Error}; +use crate::{Error, utils}; +use crate::resources::asset::{Asset, AssetKind}; /// The actual EPUB book renderer. pub struct Generator<'a> { @@ -561,7 +562,8 @@ mod tests { use std::path::Path; use super::*; - use crate::resources::{handler::MockContentRetriever, AssetKind}; + use crate::resources::asset::AssetKind; + use crate::resources::retrieve::MockContentRetriever; #[test] fn load_assets() { diff --git a/src/lib.rs b/src/lib.rs index 5530e38d1..a752a2b1f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,94 +1,37 @@ //! A `mdbook` backend for generating a book in the `EPUB` format. -use ::handlebars; -use ::thiserror::Error; #[macro_use] extern crate log; -use ::mdbook; -use ::semver; #[macro_use] extern crate serde_derive; #[macro_use] extern crate serde_json; +use std::fs::{create_dir_all, File}; +use std::path::{Path, PathBuf}; + +use ::mdbook; +use ::semver; +use ::thiserror::Error; use mdbook::config::Config as MdConfig; use mdbook::renderer::RenderContext; use semver::{Version, VersionReq}; -use std::fs::{create_dir_all, File}; -use std::path::{Path, PathBuf}; + +use errors::Error; + +pub use crate::config::Config; +pub use crate::generator::Generator; + mod config; +pub mod errors; mod generator; mod resources; mod utils; -pub use crate::config::Config; -pub use crate::generator::Generator; - /// The default stylesheet used to make the rendered document pretty. pub const DEFAULT_CSS: &str = include_str!("master.css"); -#[derive(Error, Debug)] -pub enum Error { - #[error("Incompatible mdbook version got {0} expected {1}")] - IncompatibleVersion(String, String), - - #[error("{0}")] - EpubDocCreate(String), - - #[error("Could not parse the template")] - TemplateParse, - - #[error("Content file was not found: \'{0}\'")] - ContentFileNotFound(String), - - #[error("{0}")] - AssetFileNotFound(String), - - #[error("Asset was not a file {0}")] - AssetFile(PathBuf), - - #[error("Could not open css file {0}")] - CssOpen(PathBuf), - - #[error("Unable to open template {0}")] - OpenTemplate(PathBuf), - - #[error("Unable to parse render context")] - RenderContext, - - #[error("Unable to open asset")] - AssetOpen, - - #[error("Error reading stylesheet")] - StylesheetRead, - - #[error("epubcheck has failed: {0}")] - EpubCheck(String), - - #[error(transparent)] - Io(#[from] std::io::Error), - - #[error(transparent)] - Book(#[from] mdbook::errors::Error), - #[error(transparent)] - Semver(#[from] semver::Error), - #[error(transparent)] - EpubBuilder(#[from] eyre::Report), - #[error(transparent)] - Render(#[from] handlebars::RenderError), - #[error(transparent)] - TomlDeser(#[from] toml::de::Error), - #[error(transparent)] - HttpError(#[from] Box), -} - -impl From for Error { - fn from(e: ureq::Error) -> Self { - Error::HttpError(Box::new(e)) - } -} - /// The exact version of `mdbook` this crate is compiled against. pub const MDBOOK_VERSION: &str = mdbook::MDBOOK_VERSION; diff --git a/src/resources.rs b/src/resources.rs index c3723cfa2..ee3cc0821 100644 --- a/src/resources.rs +++ b/src/resources.rs @@ -1,920 +1,3 @@ -use const_format::concatcp; -use html_parser::{Dom, Node, Element}; -use mdbook::book::BookItem; -use mdbook::renderer::RenderContext; -use mime_guess::Mime; -use pulldown_cmark::{Event, Tag}; -use std::collections::HashMap; -use std::path::{Path, PathBuf, MAIN_SEPARATOR_STR}; -use url::Url; - -use crate::{utils, Error}; - -// Internal constants for reveling 'upper folder' paths in resource links inside MD -const UPPER_PARENT: &str = concatcp!("..", MAIN_SEPARATOR_STR); -const UPPER_PARENT_LINUX: &str = concatcp!("..", "/"); -const UPPER_PARENT_STARTS_SLASH: &str = concatcp!(MAIN_SEPARATOR_STR, "..", MAIN_SEPARATOR_STR); -const UPPER_PARENT_STARTS_SLASH_LINUX: &str = concatcp!("/", "..", "/"); - -#[cfg(not(target_os = "windows"))] -const UPPER_FOLDER_PATHS: &[&str] = &[MAIN_SEPARATOR_STR, UPPER_PARENT, UPPER_PARENT_LINUX]; - -#[cfg(target_os = "windows")] -const UPPER_FOLDER_PATHS: &[&str] = &[&"/", MAIN_SEPARATOR_STR, UPPER_PARENT, UPPER_PARENT_LINUX]; - -/// Find all resources in book and put them into HashMap. -/// The key is a link, value is a composed Asset -pub(crate) fn find(ctx: &RenderContext) -> Result, Error> { - let mut assets: HashMap = HashMap::new(); - debug!("Finding resources by:\n{:?}", ctx.config); - let src_dir = ctx.root.join(&ctx.config.book.src).canonicalize()?; - - debug!( - "Start iteration over a [{:?}] sections in src_dir = {:?}", - ctx.book.sections.len(), - src_dir - ); - for section in ctx.book.iter() { - match *section { - BookItem::Chapter(ref ch) => { - let mut assets_count = 0; - debug!("Searching links and assets for: '{}'", ch); - if ch.path.is_none() { - debug!("'{}' is a draft chapter and should be no content.", ch.name); - continue; - } - for link in find_assets_in_markdown(&ch.content)? { - let asset = match Url::parse(&link) { - Ok(url) => Asset::from_url(url, &ctx.destination), - Err(_) => Asset::from_local(&link, &src_dir, ch.path.as_ref().unwrap()), - }?; - - // that is CORRECT generation way - debug!( - "Check relative path assets for: '{}' for {:?}", - ch.name, asset - ); - match asset.source { - // local asset kind - AssetKind::Local(_) => { - let relative = asset.location_on_disk.strip_prefix(&src_dir); - match relative { - Ok(relative_link_path) => { - let link_key: String = - String::from(relative_link_path.to_str().unwrap()); - if let std::collections::hash_map::Entry::Vacant(e) = - assets.entry(link_key.to_owned()) - { - debug!( - "Adding asset by link '{:?}' : {:#?}", - link_key, &asset - ); - e.insert(asset); - assets_count += 1; - } else { - debug!("Skipped asset for '{}'", link_key); - } - } - _ => { - // skip incorrect resource/image link outside of book /SRC/ folder - warn!("Sorry, we can't add 'Local asset' that is outside of book's /src/ folder, {:?}", &asset); - } - } - } - AssetKind::Remote(_) => { - // remote asset kind - let link_key: String = - String::from(asset.location_on_disk.to_str().unwrap()); - debug!( - "Adding Remote asset by link '{:?}' : {:#?}", - link_key, &asset - ); - assets.insert(link_key, asset); - assets_count += 1; - } - }; - } - debug!( - "Found '{}' links and assets inside '{}'", - assets_count, ch.name - ); - } - BookItem::Separator => trace!("Skip separator."), - BookItem::PartTitle(ref title) => trace!("Skip part title: {}.", title), - } - } - debug!("Added '{}' links and assets in total", assets.len()); - Ok(assets) -} - -/// The type of asset, remote or local -#[derive(Clone, PartialEq, Debug)] -pub(crate) enum AssetKind { - Remote(Url), - Local(PathBuf), -} - -#[derive(Clone, PartialEq, Debug)] -pub(crate) struct Asset { - /// The asset's absolute location on disk. - pub(crate) location_on_disk: PathBuf, - /// The local asset's filename relative to the `src/` or `src/assets` directory. - /// If it's a remote asset it's relative to the destination where the book generated. - pub(crate) filename: PathBuf, - pub(crate) mimetype: Mime, - /// The asset's original link as a enum [local][AssetKind::Local] or [remote][AssetKind::Remote]. - pub(crate) source: AssetKind, -} - -impl Asset { - pub(crate) fn new(filename: P, absolute_location: Q, source: K) -> Self - where - P: Into, - Q: Into, - K: Into, - { - let location_on_disk = absolute_location.into(); - let mt = mime_guess::from_path(&location_on_disk).first_or_octet_stream(); - let source = source.into(); - Self { - location_on_disk, - filename: filename.into(), - mimetype: mt, - source, - } - } - - // Create Asset by using remote Url, destination path is used for composing path - fn from_url(url: Url, dest_dir: &Path) -> Result { - trace!("Extract from URL: {:#?} into folder = {:?}", url, dest_dir); - let filename = utils::hash_link(&url); - let dest_dir = utils::normalize_path(dest_dir); - let full_filename = dest_dir.join(filename); - // Will fetch assets to normalized path later. fs::canonicalize() only works for existed path. - let absolute_location = utils::normalize_path(full_filename.as_path()); - let filename = absolute_location.strip_prefix(dest_dir).unwrap(); - let asset = Asset::new(filename, &absolute_location, AssetKind::Remote(url)); - debug!("Created from URL: {:#?}", asset); - Ok(asset) - } - - // Create Asset by using local link, source and Chapter path are used for composing fields - fn from_local(link: &str, src_dir: &Path, chapter_path: &Path) -> Result { - debug!( - "Composing asset path for {:?} + {:?} in chapter = {:?}", - src_dir, link, chapter_path - ); - let chapter_path = src_dir.join(chapter_path); - - // compose file name by it's link and chapter path - let stripped_path = Self::compute_asset_path_by_src_and_link(link, &chapter_path); - let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); - debug!("Composing full_filename by '{:?}' + '{:?}'", &stripped_path, &normalized_link.clone()); - let full_filename = stripped_path.join(normalized_link); // compose final result - - debug!("Joined full_filename = {:?}", &full_filename.display()); - let absolute_location = full_filename.canonicalize().map_err(|this_error| { - Error::AssetFileNotFound(format!( - "Asset was not found: '{link}' by '{}', error = {}", - &full_filename.display(), - this_error - )) - })?; - if !absolute_location.is_file() || absolute_location.is_symlink() { - return Err(Error::AssetFile(absolute_location)); - } - // Use filename as embedded file path with content from absolute_location. - let binding = utils::normalize_path(Path::new(link)); - debug!("Extracting file name from = {:?}, binding = '{binding:?}'", &full_filename.display()); - let filename = if cfg!(target_os = "windows") { - binding.as_os_str().to_os_string() - .into_string().expect("Error getting filename for Local Asset").replace('\\', "/") - } else { - String::from(binding.as_path().to_str().unwrap()) - }; - - let asset = Asset::new( - filename, - &absolute_location, - AssetKind::Local(PathBuf::from(link)), - ); - trace!( - "[{:#?}] = {:?} : {:?}", - asset.source, - asset.filename, - asset.location_on_disk - ); - debug!("Created from local: {:#?}", asset); - Ok(asset) - } - - // Analyses input 'link' and stripes chapter's path to shorter link - // can pop one folder above the book's src or above an internal sub folder - // 'link' is stripped too for one upper folder on one call - fn compute_asset_path_by_src_and_link(link: &str, chapter_dir: &PathBuf) -> PathBuf { - let mut reassigned_asset_root: PathBuf = PathBuf::from(chapter_dir); - let link_string = String::from(link); - // if chapter is a MD file, remove if from path - if chapter_dir.is_file() { - reassigned_asset_root.pop(); - } - trace!("check if parent present by '{}' = '{}' || '{}' || '{}'", - link_string, MAIN_SEPARATOR_STR, UPPER_PARENT, UPPER_PARENT_STARTS_SLASH); - // if link points to upper folder - if !link_string.is_empty() - && (link_string.starts_with(MAIN_SEPARATOR_STR) - || link_string.starts_with(UPPER_PARENT_LINUX) - || link_string.starts_with(UPPER_PARENT) - || link_string.starts_with(UPPER_PARENT_STARTS_SLASH) - || link_string.starts_with(UPPER_PARENT_STARTS_SLASH_LINUX)) - { - reassigned_asset_root.pop(); // remove an one folder from asset's path - // make a recursive call - let new_link = Self::remove_prefixes(link_string, UPPER_FOLDER_PATHS); - reassigned_asset_root = Self::compute_asset_path_by_src_and_link(&new_link, &reassigned_asset_root); - } - reassigned_asset_root // compose final result - } - - // Strip input link by prefixes from &str array - // return 'shorter' result or the same - fn remove_prefixes(link_to_strip: String, prefixes: &[&str]) -> String { - let mut stripped_link = link_to_strip.clone(); - for prefix in prefixes { - match link_to_strip.strip_prefix(prefix) { - Some(s) => { - stripped_link = s.to_string(); - return stripped_link - }, - None => &link_to_strip - }; - }; - stripped_link - } -} - -// Look up resources in nested HTML element -fn find_assets_in_nested_html_tags(element: &Element) -> Result, Error> { - let mut found_asset = Vec::new(); - - if element.name == "img" { - if let Some(dest) = &element.attributes["src"] { - found_asset.push(dest.clone()); - } - } - for item in &element.children { - if let Node::Element(ref nested_element) = item { - found_asset.extend(find_assets_in_nested_html_tags(nested_element)?.into_iter()); - } - } - - Ok(found_asset) -} - -// Look up resources in chapter md content -fn find_assets_in_markdown(chapter_src_content: &str) -> Result, Error> { - let mut found_asset = Vec::new(); - - let pull_down_parser = utils::create_new_pull_down_parser(chapter_src_content); - // that will process chapter content and find assets - for event in pull_down_parser { - match event { - Event::Start(Tag::Image(_, dest, _)) => { - found_asset.push(dest.to_string()); - } - Event::Html(html) => { - let content = html.into_string(); - - if let Ok(dom) = Dom::parse(&content) { - for item in dom.children { - if let Node::Element(ref element) = item { - found_asset.extend(find_assets_in_nested_html_tags(element)?.into_iter()); - } - } - } - } - _ => {} - } - } - - found_asset.sort(); - found_asset.dedup(); - if !found_asset.is_empty() { - trace!("Assets found in content : {:?}", found_asset); - } - Ok(found_asset) -} - -pub(crate) mod handler { - use std::{ - fs::{self, File, OpenOptions}, - io::{self, Read}, - path::Path, - }; - - #[cfg(test)] - use mockall::automock; - - use crate::Error; - - use super::{Asset, AssetKind}; - - #[cfg_attr(test, automock)] - pub(crate) trait ContentRetriever { - fn download(&self, asset: &Asset) -> Result<(), Error> { - if let AssetKind::Remote(url) = &asset.source { - let dest = &asset.location_on_disk; - if dest.is_file() { - debug!("Cache file {:?} to '{}' already exists.", dest, url); - } else { - if let Some(cache_dir) = dest.parent() { - fs::create_dir_all(cache_dir)?; - } - debug!("Downloading asset : {}", url); - let mut file = OpenOptions::new().create(true).write(true).open(dest)?; - let mut resp = self.retrieve(url.as_str())?; - io::copy(&mut resp, &mut file)?; - debug!("Downloaded asset by '{}'", url); - } - } - Ok(()) - } - fn read(&self, path: &Path, buffer: &mut Vec) -> Result<(), Error> { - File::open(path)?.read_to_end(buffer)?; - Ok(()) - } - fn retrieve(&self, url: &str) -> Result, Error>; - } - - pub(crate) struct ResourceHandler; - impl ContentRetriever for ResourceHandler { - fn retrieve(&self, url: &str) -> Result, Error> { - let res = ureq::get(url).call()?; - match res.status() { - 200 => Ok(res.into_reader()), - 404 => Err(Error::AssetFileNotFound(format!( - "Missing remote resource: {url}" - ))), - _ => unreachable!("Unexpected response status for '{url}'"), - } - } - } - - #[cfg(test)] - mod tests { - use super::ContentRetriever; - use crate::{resources::Asset, Error}; - use tempdir::TempDir; - - type BoxRead = Box<(dyn std::io::Read + Send + Sync + 'static)>; - - #[test] - fn download_success() { - use std::io::Read; - - struct TestHandler; - impl ContentRetriever for TestHandler { - fn retrieve(&self, _url: &str) -> Result { - Ok(Box::new("Downloaded content".as_bytes())) - } - } - let cr = TestHandler {}; - let a = temp_remote_asset("https://mdbook-epub.org/image.svg").unwrap(); - let r = cr.download(&a); - - assert!(r.is_ok()); - let mut buffer = String::new(); - let mut f = std::fs::File::open(&a.location_on_disk).unwrap(); - f.read_to_string(&mut buffer).unwrap(); - assert_eq!(buffer, "Downloaded content"); - } - - #[test] - fn download_fail_when_resource_not_exist() { - struct TestHandler; - impl ContentRetriever for TestHandler { - fn retrieve(&self, url: &str) -> Result { - Err(Error::AssetFileNotFound(format!( - "Missing remote resource: {url}" - ))) - } - } - let cr = TestHandler {}; - let a = temp_remote_asset("https://mdbook-epub.org/not-exist.svg").unwrap(); - let r = cr.download(&a); - - assert!(r.is_err()); - assert!(matches!(r.unwrap_err(), Error::AssetFileNotFound(_))); - } - - #[test] - #[should_panic(expected = "NOT 200 or 404")] - fn download_fail_with_unexpected_status() { - struct TestHandler; - impl ContentRetriever for TestHandler { - fn retrieve(&self, _url: &str) -> Result { - panic!("NOT 200 or 404") - } - } - let cr = TestHandler {}; - let a = temp_remote_asset("https://mdbook-epub.org/bad.svg").unwrap(); - let r = cr.download(&a); - - panic!("{}", r.unwrap_err().to_string()); - } - - fn temp_remote_asset(url: &str) -> Result { - let dest_dir = TempDir::new("mdbook-epub")?; - Asset::from_url(url::Url::parse(url).unwrap(), dest_dir.path()) - } - } -} - -#[cfg(test)] -mod tests { - use serde_json::{json, Value}; - - use super::*; - - #[test] - fn test_find_images() { - let parent_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/dummy/src"); - let upper_parent_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/dummy"); - let src = - "![Image 1](./rust-logo.png)\n[a link](to/nowhere) ![Image 2][2]\n\n[2]: reddit.svg\n\ - \n\n\"Rust\n\n\ - \n\n![Image 4](assets/rust-logo.png)\n[a link](to/nowhere) - ![Image 4](../third_party/wikimedia/Epub_logo_color.svg)\n[a link](to/nowhere)"; - let should_be = vec![ - upper_parent_dir - .join("third_party/wikimedia/Epub_logo_color.svg") - .canonicalize() - .unwrap(), - parent_dir.join("rust-logo.png").canonicalize().unwrap(), - parent_dir - .join("assets/rust-logo.png") - .canonicalize() - .unwrap(), - parent_dir.join("reddit.svg").canonicalize().unwrap(), - ]; - - let got = find_assets_in_markdown(src) - .unwrap() - .into_iter() - .map(|a| parent_dir.join(a).canonicalize().unwrap()) - .collect::>(); - - assert_eq!(got, should_be); - } - - #[test] - fn find_local_asset() { - let link = "./rust-logo.png"; - // link and link2 - are the same asset - let link2 = "assets/rust-logo.png"; - // not_found_link3 path won't be found because it's outside of src/ - let not_found_link3 = "../third_party/wikimedia/Epub_logo_color.svg"; - - let temp = tempdir::TempDir::new("mdbook-epub").unwrap(); - let dest_dir = temp.path().to_string_lossy().to_string(); - let chapters = json!([{ - "Chapter": { - "name": "Chapter 1", - "content": format!("# Chapter 1\r\n\r\n![Image]({link})\r\n![Image]({link2})\r\n![Image]({not_found_link3})"), - "number": [1], - "sub_items": [], - "path": "chapter_1.md", - "parent_names": []} - }]); - let ctx = ctx_with_chapters(&chapters, &dest_dir).unwrap(); - - let mut assets = find(&ctx).unwrap(); - assert!(assets.len() == 2); - - fn assert_asset(a: Asset, link: &str, ctx: &RenderContext) { - let link_as_path = utils::normalize_path(&PathBuf::from(link)); - let mut src_path = PathBuf::from(&ctx.config.book.src); - if link.starts_with(UPPER_PARENT) || link.starts_with(UPPER_PARENT_STARTS_SLASH) { - src_path.pop(); - } - - let filename = link_as_path.as_path().to_str().unwrap(); - let absolute_location = PathBuf::from(&ctx.root) - .join(&src_path) - .join(&link_as_path) - .canonicalize() - .expect("Asset Location is not found"); - - let source = AssetKind::Local(PathBuf::from(link)); - let should_be = Asset::new(filename, absolute_location, source); - assert_eq!(a, should_be); - } - assert_asset(assets.remove( - utils::normalize_path(&PathBuf::from(link)).to_str().unwrap() - ).unwrap(), link, &ctx); - assert_asset(assets.remove( - utils::normalize_path(&PathBuf::from(link2)).to_str().unwrap() - ).unwrap(), link2, &ctx); - } - - #[test] - fn find_remote_asset() { - let link = "https://www.rust-lang.org/static/images/rust-logo-blk.svg"; - let link2 = "https://www.rust-lang.org/static/images/rust-logo-blk.png"; - let link_parsed = Url::parse(link).unwrap(); - let link_parsed2 = Url::parse(link2).unwrap(); - let temp = tempdir::TempDir::new("mdbook-epub").unwrap(); - let dest_dir = temp.path().to_string_lossy().to_string(); - let chapters = json!([ - {"Chapter": { - "name": "Chapter 1", - "content": format!("# Chapter 1\r\n\r\n![Image]({link})\r\n"), - "number": [1], - "sub_items": [], - "path": "chapter_1.md", - "parent_names": []}}]); - let ctx = ctx_with_chapters(&chapters, &dest_dir).unwrap(); - - let mut assets = find(&ctx).unwrap(); - assert!(assets.len() == 2); - - for (key, value) in assets.clone().into_iter() { - trace!("{} / {:?}", key, &value); - match value.source { - AssetKind::Remote(internal_url) => { - let key_to_remove = value.location_on_disk.to_str().unwrap(); - let got = assets.remove(key_to_remove).unwrap(); - let filename; - if key_to_remove.contains(".svg") { - filename = PathBuf::from("").join(utils::hash_link(&link_parsed)); - } else { - filename = PathBuf::from("").join(utils::hash_link(&link_parsed2)); - } - let absolute_location = temp.path().join(&filename); - let source = AssetKind::Remote(internal_url); - let should_be = Asset::new(filename, absolute_location, source); - assert_eq!(got, should_be); - } - _ => { - // only remote urls are processed here for simplicity - panic!("Should not be here... only remote urls are used here") - } - } - } - } - - #[test] - fn find_draft_chapter_without_error() { - let temp = tempdir::TempDir::new("mdbook-epub").unwrap(); - let dest_dir = temp.into_path().to_string_lossy().to_string(); - let chapters = json!([ - {"Chapter": { - "name": "Chapter 1", - "content": "", - "number": [1], - "sub_items": [], - "path": null, - "parent_names": []}}]); - let ctx = ctx_with_chapters(&chapters, &dest_dir).unwrap(); - assert!(find(&ctx).unwrap().is_empty()); - } - - #[test] - #[should_panic(expected = "Asset was not found")] - fn find_asset_fail_when_chapter_dir_not_exist() { - panic!( - "{}", - Asset::from_local("a.png", Path::new("tests\\dummy\\src"), Path::new("ch\\a.md")) - .unwrap_err() - .to_string() - ); - } - - #[cfg(not(target_os = "windows"))] - #[test] - #[should_panic(expected = "Asset was not found")] - fn find_asset_fail_when_chapter_dir_not_exist_linux() { - panic!( - "{}", - Asset::from_local("a.png", Path::new("tests/dummy/src"), Path::new("ch/a.md")) - .unwrap_err() - .to_string() - ); - } - - #[cfg(not(target_os = "windows"))] - #[test] - #[should_panic( - expected = "Asset was not found: 'wikimedia' by 'tests/dummy/third_party/a.md/wikimedia', error = No such file or directory (os error 2)" - )] - fn find_asset_fail_when_it_is_a_dir() { - panic!( - "{}", - Asset::from_local( - "wikimedia", - Path::new("tests/dummy"), - Path::new("third_party/a.md") - ) - .unwrap_err() - .to_string() - ); - } - - #[cfg(target_os = "windows")] - #[test] - #[should_panic( - //expected = "Asset was not found: 'wikimedia' by 'tests\\dummy\\third_party\\a.md\\wikimedia', error = Системе не удается найти указанный путь. (os error 3)" - expected = "Asset was not found: 'wikimedia' by 'tests\\dummy\\third_party\\a.md\\wikimedia', error = The system cannot find the path specified. (os error 3)" - )] - fn find_asset_fail_when_it_is_a_dir_windows() { - panic!( - "{}", - Asset::from_local( - "wikimedia", - Path::new("tests\\dummy"), - Path::new("third_party\\a.md") - ) - .unwrap_err() - .to_string() - ); - } - - fn ctx_with_chapters( - chapters: &Value, - destination: &str, - ) -> Result { - let json_ctx = json!({ - "version": mdbook::MDBOOK_VERSION, - "root": "tests/dummy", - "book": {"sections": chapters, "__non_exhaustive": null}, - "config": { - "book": {"authors": [], "language": "en", "multilingual": false, - "src": "src", "title": "DummyBook"}, - "output": {"epub": {"curly-quotes": true}}}, - "destination": destination - }); - RenderContext::from_json(json_ctx.to_string().as_bytes()) - } - - #[test] - fn test_compute_asset_path_by_src_and_link_to_full_path() { - let book_source_root_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/dummy/src"); - let mut book_chapter_dir = PathBuf::from(book_source_root_dir); - book_chapter_dir.push(Path::new("chapter_1.md")); - - let link = "./asset1.jpg"; - let asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_chapter_dir); - let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); - let full_path = asset_path.join(normalized_link); // compose final result - assert_eq!( - full_path.as_path(), - Path::new(env!("CARGO_MANIFEST_DIR")) - .join("tests/dummy/src") - .join("asset1.jpg") - ); - } - - #[test] - fn test_remove_prefixes() { - let link_string = String::from("assets/verify.jpeg"); - let link_string = Asset::remove_prefixes(link_string, UPPER_FOLDER_PATHS); - assert_eq!("assets/verify.jpeg", link_string); - - let link_string = String::from("/assets/verify.jpeg"); - let link_string = Asset::remove_prefixes(link_string, UPPER_FOLDER_PATHS); - assert_eq!("assets/verify.jpeg", link_string); - - let link_string = String::from("../../assets/verify.jpeg"); - let link_string = Asset::remove_prefixes(link_string, UPPER_FOLDER_PATHS); - assert_eq!("../assets/verify.jpeg", link_string); - let new_link = Asset::remove_prefixes(link_string, UPPER_FOLDER_PATHS); - assert_eq!("assets/verify.jpeg", new_link); - - let upper_folder_path = &[UPPER_PARENT_LINUX, UPPER_PARENT, MAIN_SEPARATOR_STR, &"/"]; - let link_string = String::from("assets/verify.jpeg"); - let link_string = Asset::remove_prefixes(link_string, upper_folder_path); - assert_eq!("assets/verify.jpeg", link_string); - - let link_string = String::from("/assets/verify.jpeg"); - let link_string = Asset::remove_prefixes(link_string, upper_folder_path); - assert_eq!("assets/verify.jpeg", link_string); - - let link_string = String::from("../../assets/verify.jpeg"); - let link_string = Asset::remove_prefixes(link_string, upper_folder_path); - assert_eq!("../assets/verify.jpeg", link_string); - let new_link = Asset::remove_prefixes(link_string, upper_folder_path); - assert_eq!("assets/verify.jpeg", new_link); - } - - #[cfg(target_os = "windows")] - #[test] - fn test_remove_prefixes_windows() { - let link_string = String::from("assets\\verify.jpeg"); - let link_string = Asset::remove_prefixes(link_string, UPPER_FOLDER_PATHS); - assert_eq!("assets\\verify.jpeg", link_string); - - let link_string = String::from("\\assets\\verify.jpeg"); - let link_string = Asset::remove_prefixes(link_string, UPPER_FOLDER_PATHS); - assert_eq!("assets\\verify.jpeg", link_string); - - let link_string = String::from("..\\..\\assets\\verify.jpeg"); - let link_string = Asset::remove_prefixes(link_string, UPPER_FOLDER_PATHS); - assert_eq!("..\\assets\\verify.jpeg", link_string); - let new_link = Asset::remove_prefixes(link_string, UPPER_FOLDER_PATHS); - assert_eq!("assets\\verify.jpeg", new_link); - - let upper_folder_path = &[UPPER_PARENT_LINUX, UPPER_PARENT, MAIN_SEPARATOR_STR, &"/"]; - let link_string = String::from("assets\\verify.jpeg"); - let link_string = Asset::remove_prefixes(link_string, upper_folder_path); - assert_eq!("assets\\verify.jpeg", link_string); - - let link_string = String::from("/assets\\verify.jpeg"); - let link_string = Asset::remove_prefixes(link_string, upper_folder_path); - assert_eq!("assets\\verify.jpeg", link_string); - - let link_string = String::from("..\\..\\assets\\verify.jpeg"); - let link_string = Asset::remove_prefixes(link_string, upper_folder_path); - assert_eq!("..\\assets\\verify.jpeg", link_string); - let new_link = Asset::remove_prefixes(link_string, upper_folder_path); - assert_eq!("assets\\verify.jpeg", new_link); - } - - #[test] - fn test_compute_asset_path_by_src_and_link() { - let mut book_or_chapter_src = ["media", "book", "src"].iter().collect::(); - - let mut link = "./asset1.jpg"; - let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); - let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); - asset_path = asset_path.join(normalized_link); // compose final result - assert_eq!( - asset_path.as_path().as_os_str(), - (["media", "book", "src", "asset1.jpg"]).iter().collect::().as_os_str() - ); - - link = "asset1.jpg"; - let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); - let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); - asset_path = asset_path.join(normalized_link); // compose final result - assert_eq!( - asset_path.as_path(), - ["media", "book", "src", "asset1.jpg"].iter().collect::() - ); - - link = "../upper/assets/asset1.jpg"; - let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); - let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); - asset_path = asset_path.join(normalized_link); // compose final result - assert_eq!( - asset_path.as_path(), - ["media", "book", "upper", "assets", "asset1.jpg"].iter().collect::() - ); - - link = "assets/asset1.jpg"; - let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); - let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); - asset_path = asset_path.join(normalized_link); // compose final result - assert_eq!( - asset_path.as_path(), - ["media", "book", "src", "assets", "asset1.jpg"].iter().collect::() - ); - - link = "./assets/asset1.jpg"; - let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); - let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); - asset_path = asset_path.join(normalized_link); // compose final result - assert_eq!( - asset_path.as_path(), - ["media", "book", "src", "assets", "asset1.jpg"].iter().collect::() - ); - - book_or_chapter_src = ["media", "book", "src", "chapter1"].iter().collect::(); - - link = "../assets/asset1.jpg"; - let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); - let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); - asset_path = asset_path.join(normalized_link); // compose final result - assert_eq!( - asset_path.as_path(), - ["media", "book", "src", "assets", "asset1.jpg"].iter().collect::() - ); - - book_or_chapter_src = ["media", "book", "src", "chapter1", "inner"].iter().collect::(); - link = "../../assets/asset1.jpg"; - let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); - let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); - asset_path = asset_path.join(normalized_link); // compose final result - assert_eq!( - asset_path.as_path(), - ["media", "book", "src", "assets", "asset1.jpg"].iter().collect::() - ); - } - - #[cfg(target_os = "windows")] - #[test] - fn test_compute_asset_path_by_src_and_link_windows() { - let mut book_or_chapter_src = ["media", "book", "src"].iter().collect::(); - - let mut link = ".\\asset1.jpg"; - let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); - let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); - asset_path = asset_path.join(normalized_link); // compose final result - assert_eq!( - asset_path.as_path().as_os_str(), - (["media", "book", "src", "asset1.jpg"]).iter().collect::().as_os_str() - ); - - link = "asset1.jpg"; - let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); - let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); - asset_path = asset_path.join(normalized_link); // compose final result - assert_eq!( - asset_path.as_path(), - ["media", "book", "src", "asset1.jpg"].iter().collect::() - ); - - link = "..\\upper\\assets\\asset1.jpg"; - let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); - let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); - asset_path = asset_path.join(normalized_link); // compose final result - assert_eq!( - asset_path.as_path(), - ["media", "book", "upper", "assets", "asset1.jpg"].iter().collect::() - ); - - link = "assets\\asset1.jpg"; - let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); - let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); - asset_path = asset_path.join(normalized_link); // compose final result - assert_eq!( - asset_path.as_path(), - ["media", "book", "src", "assets", "asset1.jpg"].iter().collect::() - ); - - link = ".\\assets\\asset1.jpg"; - let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); - let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); - asset_path = asset_path.join(normalized_link); // compose final result - assert_eq!( - asset_path.as_path(), - ["media", "book", "src", "assets", "asset1.jpg"].iter().collect::() - ); - - book_or_chapter_src = ["media", "book", "src", "chapter1"].iter().collect::(); - - link = "..\\assets\\asset1.jpg"; - let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); - let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); - asset_path = asset_path.join(normalized_link); // compose final result - assert_eq!( - asset_path.as_path(), - ["media", "book", "src", "assets", "asset1.jpg"].iter().collect::() - ); - - book_or_chapter_src = ["media", "book", "src", "chapter1", "inner"].iter().collect::(); - link = "..\\..\\assets\\asset1.jpg"; - let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); - let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); - asset_path = asset_path.join(normalized_link); // compose final result - assert_eq!( - asset_path.as_path(), - ["media", "book", "src", "assets", "asset1.jpg"].iter().collect::() - ); - } - - #[cfg(not(target_os = "windows"))] - #[test] - fn incorrect_compute_asset_path_by_src_and_link() { - let book_or_chapter_src = ["media", "book", "src"].iter().collect::(); - - let link = "/assets/asset1.jpg"; - let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); - let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); - asset_path = asset_path.join(normalized_link); // compose final result - assert_eq!(asset_path.as_path(), Path::new("/assets/asset1.jpg")); - - let link = "/../assets/asset1.jpg"; - let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); - let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); - asset_path = asset_path.join(normalized_link); // compose final result - assert_eq!(asset_path.as_path(), Path::new("/assets/asset1.jpg")); - } - - #[cfg(target_os = "windows")] - #[test] - fn incorrect_compute_asset_path_by_src_and_link_windows() { - let book_or_chapter_src = ["media", "book", "src"].iter().collect::(); - - let link = "\\assets\\asset1.jpg"; - let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); - let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); - asset_path = asset_path.join(normalized_link); // compose final result - assert_eq!(asset_path.as_path(), Path::new("/assets/asset1.jpg")); - - let link = "\\..\\assets/asset1.jpg"; - let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); - let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); - asset_path = asset_path.join(normalized_link); // compose final result - assert_eq!(asset_path.as_path(), Path::new("/assets/asset1.jpg")); - } -} +pub(crate) mod resources; +pub(crate) mod retrieve; +pub(crate) mod asset; \ No newline at end of file diff --git a/src/resources/asset.rs b/src/resources/asset.rs new file mode 100644 index 000000000..9721a1a35 --- /dev/null +++ b/src/resources/asset.rs @@ -0,0 +1,152 @@ +use std::path::{MAIN_SEPARATOR_STR, Path, PathBuf}; +use url::Url; +use mime_guess::Mime; +use crate::errors::Error; +use crate::resources::resources::{UPPER_FOLDER_PATHS, UPPER_PARENT, UPPER_PARENT_LINUX, UPPER_PARENT_STARTS_SLASH, UPPER_PARENT_STARTS_SLASH_LINUX}; +use crate::utils; + +/// The type of asset, remote or local +#[derive(Clone, PartialEq, Debug)] +pub(crate) enum AssetKind { + Remote(Url), + Local(PathBuf), +} + +#[derive(Clone, PartialEq, Debug)] +pub(crate) struct Asset { + /// The asset's absolute location on disk. + pub(crate) location_on_disk: PathBuf, + /// The local asset's filename relative to the `src/` or `src/assets` directory. + /// If it's a remote asset it's relative to the destination where the book generated. + pub(crate) filename: PathBuf, + pub(crate) mimetype: Mime, + /// The asset's original link as a enum [local][AssetKind::Local] or [remote][AssetKind::Remote]. + pub(crate) source: AssetKind, +} + +impl Asset { + pub(crate) fn new(filename: P, absolute_location: Q, source: K) -> Self + where + P: Into, + Q: Into, + K: Into, + { + let location_on_disk = absolute_location.into(); + let mt = mime_guess::from_path(&location_on_disk).first_or_octet_stream(); + let source = source.into(); + Self { + location_on_disk, + filename: filename.into(), + mimetype: mt, + source, + } + } + + // Create Asset by using remote Url, destination path is used for composing path + pub(crate) fn from_url(url: Url, dest_dir: &Path) -> Result { + trace!("Extract from URL: {:#?} into folder = {:?}", url, dest_dir); + let filename = utils::hash_link(&url); + let dest_dir = utils::normalize_path(dest_dir); + let full_filename = dest_dir.join(filename); + // Will fetch assets to normalized path later. fs::canonicalize() only works for existed path. + let absolute_location = utils::normalize_path(full_filename.as_path()); + let filename = absolute_location.strip_prefix(dest_dir).unwrap(); + let asset = Asset::new(filename, &absolute_location, AssetKind::Remote(url)); + debug!("Created from URL: {:#?}", asset); + Ok(asset) + } + + // Create Asset by using local link, source and Chapter path are used for composing fields + pub(crate) fn from_local(link: &str, src_dir: &Path, chapter_path: &Path) -> Result { + debug!( + "Composing asset path for {:?} + {:?} in chapter = {:?}", + src_dir, link, chapter_path + ); + let chapter_path = src_dir.join(chapter_path); + + // compose file name by it's link and chapter path + let stripped_path = Self::compute_asset_path_by_src_and_link(link, &chapter_path); + let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); + debug!("Composing full_filename by '{:?}' + '{:?}'", &stripped_path, &normalized_link.clone()); + let full_filename = stripped_path.join(normalized_link); // compose final result + + debug!("Joined full_filename = {:?}", &full_filename.display()); + let absolute_location = full_filename.canonicalize().map_err(|this_error| { + Error::AssetFileNotFound(format!( + "Asset was not found: '{link}' by '{}', error = {}", + &full_filename.display(), + this_error + )) + })?; + if !absolute_location.is_file() || absolute_location.is_symlink() { + return Err(Error::AssetFile(absolute_location)); + } + // Use filename as embedded file path with content from absolute_location. + let binding = utils::normalize_path(Path::new(link)); + debug!("Extracting file name from = {:?}, binding = '{binding:?}'", &full_filename.display()); + let filename = if cfg!(target_os = "windows") { + binding.as_os_str().to_os_string() + .into_string().expect("Error getting filename for Local Asset").replace('\\', "/") + } else { + String::from(binding.as_path().to_str().unwrap()) + }; + + let asset = Asset::new( + filename, + &absolute_location, + AssetKind::Local(PathBuf::from(link)), + ); + trace!( + "[{:#?}] = {:?} : {:?}", + asset.source, + asset.filename, + asset.location_on_disk + ); + debug!("Created from local: {:#?}", asset); + Ok(asset) + } + + // Analyses input 'link' and stripes chapter's path to shorter link + // can pop one folder above the book's src or above an internal sub folder + // 'link' is stripped too for one upper folder on one call + pub(crate) fn compute_asset_path_by_src_and_link(link: &str, chapter_dir: &PathBuf) -> PathBuf { + let mut reassigned_asset_root: PathBuf = PathBuf::from(chapter_dir); + let link_string = String::from(link); + // if chapter is a MD file, remove if from path + if chapter_dir.is_file() { + reassigned_asset_root.pop(); + } + trace!("check if parent present by '{}' = '{}' || '{}' || '{}'", + link_string, MAIN_SEPARATOR_STR, UPPER_PARENT, UPPER_PARENT_STARTS_SLASH); + // if link points to upper folder + if !link_string.is_empty() + && (link_string.starts_with(MAIN_SEPARATOR_STR) + || link_string.starts_with(UPPER_PARENT_LINUX) + || link_string.starts_with(UPPER_PARENT) + || link_string.starts_with(UPPER_PARENT_STARTS_SLASH) + || link_string.starts_with(UPPER_PARENT_STARTS_SLASH_LINUX)) + { + reassigned_asset_root.pop(); // remove an one folder from asset's path + // make a recursive call + let new_link = Self::remove_prefixes(link_string, UPPER_FOLDER_PATHS); + reassigned_asset_root = Self::compute_asset_path_by_src_and_link(&new_link, &reassigned_asset_root); + } + reassigned_asset_root // compose final result + } + + // Strip input link by prefixes from &str array + // return 'shorter' result or the same + pub(crate) fn remove_prefixes(link_to_strip: String, prefixes: &[&str]) -> String { + let mut stripped_link = link_to_strip.clone(); + for prefix in prefixes { + match link_to_strip.strip_prefix(prefix) { + Some(s) => { + stripped_link = s.to_string(); + return stripped_link + }, + None => &link_to_strip + }; + }; + stripped_link + } +} diff --git a/src/resources/resources.rs b/src/resources/resources.rs new file mode 100644 index 000000000..cf379d202 --- /dev/null +++ b/src/resources/resources.rs @@ -0,0 +1,712 @@ +use std::collections::HashMap; +use std::path::MAIN_SEPARATOR_STR; + +use const_format::concatcp; +use html_parser::{Dom, Element, Node}; +use mdbook::book::BookItem; +use mdbook::renderer::RenderContext; +use pulldown_cmark::{Event, Tag}; +use url::Url; + +use crate::resources::asset::{Asset, AssetKind}; +use crate::{utils, Error}; + +// Internal constants for reveling 'upper folder' paths in resource links inside MD +pub(crate) const UPPER_PARENT: &str = concatcp!("..", MAIN_SEPARATOR_STR); +pub(crate) const UPPER_PARENT_LINUX: &str = concatcp!("..", "/"); +pub(crate) const UPPER_PARENT_STARTS_SLASH: &str = + concatcp!(MAIN_SEPARATOR_STR, "..", MAIN_SEPARATOR_STR); +pub(crate) const UPPER_PARENT_STARTS_SLASH_LINUX: &str = concatcp!("/", "..", "/"); + +#[cfg(not(target_os = "windows"))] +pub(crate) const UPPER_FOLDER_PATHS: &[&str] = + &[MAIN_SEPARATOR_STR, UPPER_PARENT, UPPER_PARENT_LINUX]; + +#[cfg(target_os = "windows")] +pub(crate) const UPPER_FOLDER_PATHS: &[&str] = + &[&"/", MAIN_SEPARATOR_STR, UPPER_PARENT, UPPER_PARENT_LINUX]; + +/// Find all resources in book and put them into HashMap. +/// The key is a link, value is a composed Asset +pub(crate) fn find(ctx: &RenderContext) -> Result, Error> { + let mut assets: HashMap = HashMap::new(); + debug!("Finding resources by:\n{:?}", ctx.config); + let src_dir = ctx.root.join(&ctx.config.book.src).canonicalize()?; + + debug!( + "Start iteration over a [{:?}] sections in src_dir = {:?}", + ctx.book.sections.len(), + src_dir + ); + for section in ctx.book.iter() { + match *section { + BookItem::Chapter(ref ch) => { + let mut assets_count = 0; + debug!("Searching links and assets for: '{}'", ch); + if ch.path.is_none() { + debug!("'{}' is a draft chapter and should be no content.", ch.name); + continue; + } + for link in find_assets_in_markdown(&ch.content)? { + let asset = match Url::parse(&link) { + Ok(url) => Asset::from_url(url, &ctx.destination), + Err(_) => Asset::from_local(&link, &src_dir, ch.path.as_ref().unwrap()), + }?; + + // that is CORRECT generation way + debug!( + "Check relative path assets for: '{}' for {:?}", + ch.name, asset + ); + match asset.source { + // local asset kind + AssetKind::Local(_) => { + let relative = asset.location_on_disk.strip_prefix(&src_dir); + match relative { + Ok(relative_link_path) => { + let link_key: String = + String::from(relative_link_path.to_str().unwrap()); + if let std::collections::hash_map::Entry::Vacant(e) = + assets.entry(link_key.to_owned()) + { + debug!( + "Adding asset by link '{:?}' : {:#?}", + link_key, &asset + ); + e.insert(asset); + assets_count += 1; + } else { + debug!("Skipped asset for '{}'", link_key); + } + } + _ => { + // skip incorrect resource/image link outside of book /SRC/ folder + warn!("Sorry, we can't add 'Local asset' that is outside of book's /src/ folder, {:?}", &asset); + } + } + } + AssetKind::Remote(_) => { + // remote asset kind + let link_key: String = + String::from(asset.location_on_disk.to_str().unwrap()); + debug!( + "Adding Remote asset by link '{:?}' : {:#?}", + link_key, &asset + ); + assets.insert(link_key, asset); + assets_count += 1; + } + }; + } + debug!( + "Found '{}' links and assets inside '{}'", + assets_count, ch.name + ); + } + BookItem::Separator => trace!("Skip separator."), + BookItem::PartTitle(ref title) => trace!("Skip part title: {}.", title), + } + } + debug!("Added '{}' links and assets in total", assets.len()); + Ok(assets) +} + +// Look up resources in nested HTML element +fn find_assets_in_nested_html_tags(element: &Element) -> Result, Error> { + let mut found_asset = Vec::new(); + + if element.name == "img" { + if let Some(dest) = &element.attributes["src"] { + found_asset.push(dest.clone()); + } + } + for item in &element.children { + if let Node::Element(ref nested_element) = item { + found_asset.extend(find_assets_in_nested_html_tags(nested_element)?.into_iter()); + } + } + + Ok(found_asset) +} + +// Look up resources in chapter md content +fn find_assets_in_markdown(chapter_src_content: &str) -> Result, Error> { + let mut found_asset = Vec::new(); + + let pull_down_parser = utils::create_new_pull_down_parser(chapter_src_content); + // that will process chapter content and find assets + for event in pull_down_parser { + match event { + Event::Start(Tag::Image(_, dest, _)) => { + found_asset.push(dest.to_string()); + } + Event::Html(html) => { + let content = html.into_string(); + + if let Ok(dom) = Dom::parse(&content) { + for item in dom.children { + if let Node::Element(ref element) = item { + found_asset + .extend(find_assets_in_nested_html_tags(element)?.into_iter()); + } + } + } + } + _ => {} + } + } + + found_asset.sort(); + found_asset.dedup(); + if !found_asset.is_empty() { + trace!("Assets found in content : {:?}", found_asset); + } + Ok(found_asset) +} + +#[cfg(test)] +mod tests { + use std::path::{Path, PathBuf}; + use serde_json::{json, Value}; + use super::*; + + #[test] + fn test_find_images() { + let parent_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/dummy/src"); + let upper_parent_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/dummy"); + let src = + "![Image 1](./rust-logo.png)\n[a link](to/nowhere) ![Image 2][2]\n\n[2]: reddit.svg\n\ + \n\n\"Rust\n\n\ + \n\n![Image 4](assets/rust-logo.png)\n[a link](to/nowhere) + ![Image 4](../third_party/wikimedia/Epub_logo_color.svg)\n[a link](to/nowhere)"; + let should_be = vec![ + upper_parent_dir + .join("third_party/wikimedia/Epub_logo_color.svg") + .canonicalize() + .unwrap(), + parent_dir.join("rust-logo.png").canonicalize().unwrap(), + parent_dir + .join("assets/rust-logo.png") + .canonicalize() + .unwrap(), + parent_dir.join("reddit.svg").canonicalize().unwrap(), + ]; + + let got = find_assets_in_markdown(src) + .unwrap() + .into_iter() + .map(|a| parent_dir.join(a).canonicalize().unwrap()) + .collect::>(); + + assert_eq!(got, should_be); + } + + #[test] + fn find_local_asset() { + let link = "./rust-logo.png"; + // link and link2 - are the same asset + let link2 = "assets/rust-logo.png"; + // not_found_link3 path won't be found because it's outside of src/ + let not_found_link3 = "../third_party/wikimedia/Epub_logo_color.svg"; + + let temp = tempdir::TempDir::new("mdbook-epub").unwrap(); + let dest_dir = temp.path().to_string_lossy().to_string(); + let chapters = json!([{ + "Chapter": { + "name": "Chapter 1", + "content": format!("# Chapter 1\r\n\r\n![Image]({link})\r\n![Image]({link2})\r\n![Image]({not_found_link3})"), + "number": [1], + "sub_items": [], + "path": "chapter_1.md", + "parent_names": []} + }]); + let ctx = ctx_with_chapters(&chapters, &dest_dir).unwrap(); + + let mut assets = find(&ctx).unwrap(); + assert!(assets.len() == 2); + + fn assert_asset(a: Asset, link: &str, ctx: &RenderContext) { + let link_as_path = utils::normalize_path(&PathBuf::from(link)); + let mut src_path = PathBuf::from(&ctx.config.book.src); + if link.starts_with(UPPER_PARENT) || link.starts_with(UPPER_PARENT_STARTS_SLASH) { + src_path.pop(); + } + + let filename = link_as_path.as_path().to_str().unwrap(); + let absolute_location = PathBuf::from(&ctx.root) + .join(&src_path) + .join(&link_as_path) + .canonicalize() + .expect("Asset Location is not found"); + + let source = AssetKind::Local(PathBuf::from(link)); + let should_be = Asset::new(filename, absolute_location, source); + assert_eq!(a, should_be); + } + assert_asset( + assets + .remove( + utils::normalize_path(&PathBuf::from(link)) + .to_str() + .unwrap(), + ) + .unwrap(), + link, + &ctx, + ); + assert_asset( + assets + .remove( + utils::normalize_path(&PathBuf::from(link2)) + .to_str() + .unwrap(), + ) + .unwrap(), + link2, + &ctx, + ); + } + + #[test] + fn find_remote_asset() { + let link = "https://www.rust-lang.org/static/images/rust-logo-blk.svg"; + let link2 = "https://www.rust-lang.org/static/images/rust-logo-blk.png"; + let link_parsed = Url::parse(link).unwrap(); + let link_parsed2 = Url::parse(link2).unwrap(); + let temp = tempdir::TempDir::new("mdbook-epub").unwrap(); + let dest_dir = temp.path().to_string_lossy().to_string(); + let chapters = json!([ + {"Chapter": { + "name": "Chapter 1", + "content": format!("# Chapter 1\r\n\r\n![Image]({link})\r\n"), + "number": [1], + "sub_items": [], + "path": "chapter_1.md", + "parent_names": []}}]); + let ctx = ctx_with_chapters(&chapters, &dest_dir).unwrap(); + + let mut assets = find(&ctx).unwrap(); + assert!(assets.len() == 2); + + for (key, value) in assets.clone().into_iter() { + trace!("{} / {:?}", key, &value); + match value.source { + AssetKind::Remote(internal_url) => { + let key_to_remove = value.location_on_disk.to_str().unwrap(); + let got = assets.remove(key_to_remove).unwrap(); + let filename; + if key_to_remove.contains(".svg") { + filename = PathBuf::from("").join(utils::hash_link(&link_parsed)); + } else { + filename = PathBuf::from("").join(utils::hash_link(&link_parsed2)); + } + let absolute_location = temp.path().join(&filename); + let source = AssetKind::Remote(internal_url); + let should_be = Asset::new(filename, absolute_location, source); + assert_eq!(got, should_be); + } + _ => { + // only remote urls are processed here for simplicity + panic!("Should not be here... only remote urls are used here") + } + } + } + } + + #[test] + fn find_draft_chapter_without_error() { + let temp = tempdir::TempDir::new("mdbook-epub").unwrap(); + let dest_dir = temp.into_path().to_string_lossy().to_string(); + let chapters = json!([ + {"Chapter": { + "name": "Chapter 1", + "content": "", + "number": [1], + "sub_items": [], + "path": null, + "parent_names": []}}]); + let ctx = ctx_with_chapters(&chapters, &dest_dir).unwrap(); + assert!(find(&ctx).unwrap().is_empty()); + } + + #[test] + #[should_panic(expected = "Asset was not found")] + fn find_asset_fail_when_chapter_dir_not_exist() { + panic!( + "{}", + Asset::from_local( + "a.png", + Path::new("tests\\dummy\\src"), + Path::new("ch\\a.md") + ) + .unwrap_err() + .to_string() + ); + } + + #[cfg(not(target_os = "windows"))] + #[test] + #[should_panic(expected = "Asset was not found")] + fn find_asset_fail_when_chapter_dir_not_exist_linux() { + panic!( + "{}", + Asset::from_local("a.png", Path::new("tests/dummy/src"), Path::new("ch/a.md")) + .unwrap_err() + .to_string() + ); + } + + #[cfg(not(target_os = "windows"))] + #[test] + #[should_panic( + expected = "Asset was not found: 'wikimedia' by 'tests/dummy/third_party/a.md/wikimedia', error = No such file or directory (os error 2)" + )] + fn find_asset_fail_when_it_is_a_dir() { + panic!( + "{}", + Asset::from_local( + "wikimedia", + Path::new("tests/dummy"), + Path::new("third_party/a.md") + ) + .unwrap_err() + .to_string() + ); + } + + #[cfg(target_os = "windows")] + #[test] + #[should_panic( + //expected = "Asset was not found: 'wikimedia' by 'tests\\dummy\\third_party\\a.md\\wikimedia', error = Системе не удается найти указанный путь. (os error 3)" + expected = "Asset was not found: 'wikimedia' by 'tests\\dummy\\third_party\\a.md\\wikimedia', error = The system cannot find the path specified. (os error 3)" + )] + fn find_asset_fail_when_it_is_a_dir_windows() { + panic!( + "{}", + Asset::from_local( + "wikimedia", + Path::new("tests\\dummy"), + Path::new("third_party\\a.md") + ) + .unwrap_err() + .to_string() + ); + } + + fn ctx_with_chapters( + chapters: &Value, + destination: &str, + ) -> Result { + let json_ctx = json!({ + "version": mdbook::MDBOOK_VERSION, + "root": "tests/dummy", + "book": {"sections": chapters, "__non_exhaustive": null}, + "config": { + "book": {"authors": [], "language": "en", "multilingual": false, + "src": "src", "title": "DummyBook"}, + "output": {"epub": {"curly-quotes": true}}}, + "destination": destination + }); + RenderContext::from_json(json_ctx.to_string().as_bytes()) + } + + #[test] + fn test_compute_asset_path_by_src_and_link_to_full_path() { + let book_source_root_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/dummy/src"); + let mut book_chapter_dir = PathBuf::from(book_source_root_dir); + book_chapter_dir.push(Path::new("chapter_1.md")); + + let link = "./asset1.jpg"; + let asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_chapter_dir); + let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); + let full_path = asset_path.join(normalized_link); // compose final result + assert_eq!( + full_path.as_path(), + Path::new(env!("CARGO_MANIFEST_DIR")) + .join("tests/dummy/src") + .join("asset1.jpg") + ); + } + + #[test] + fn test_remove_prefixes() { + let link_string = String::from("assets/verify.jpeg"); + let link_string = Asset::remove_prefixes(link_string, UPPER_FOLDER_PATHS); + assert_eq!("assets/verify.jpeg", link_string); + + let link_string = String::from("/assets/verify.jpeg"); + let link_string = Asset::remove_prefixes(link_string, UPPER_FOLDER_PATHS); + assert_eq!("assets/verify.jpeg", link_string); + + let link_string = String::from("../../assets/verify.jpeg"); + let link_string = Asset::remove_prefixes(link_string, UPPER_FOLDER_PATHS); + assert_eq!("../assets/verify.jpeg", link_string); + let new_link = Asset::remove_prefixes(link_string, UPPER_FOLDER_PATHS); + assert_eq!("assets/verify.jpeg", new_link); + + let upper_folder_path = &[UPPER_PARENT_LINUX, UPPER_PARENT, MAIN_SEPARATOR_STR, &"/"]; + let link_string = String::from("assets/verify.jpeg"); + let link_string = Asset::remove_prefixes(link_string, upper_folder_path); + assert_eq!("assets/verify.jpeg", link_string); + + let link_string = String::from("/assets/verify.jpeg"); + let link_string = Asset::remove_prefixes(link_string, upper_folder_path); + assert_eq!("assets/verify.jpeg", link_string); + + let link_string = String::from("../../assets/verify.jpeg"); + let link_string = Asset::remove_prefixes(link_string, upper_folder_path); + assert_eq!("../assets/verify.jpeg", link_string); + let new_link = Asset::remove_prefixes(link_string, upper_folder_path); + assert_eq!("assets/verify.jpeg", new_link); + } + + #[cfg(target_os = "windows")] + #[test] + fn test_remove_prefixes_windows() { + let link_string = String::from("assets\\verify.jpeg"); + let link_string = Asset::remove_prefixes(link_string, UPPER_FOLDER_PATHS); + assert_eq!("assets\\verify.jpeg", link_string); + + let link_string = String::from("\\assets\\verify.jpeg"); + let link_string = Asset::remove_prefixes(link_string, UPPER_FOLDER_PATHS); + assert_eq!("assets\\verify.jpeg", link_string); + + let link_string = String::from("..\\..\\assets\\verify.jpeg"); + let link_string = Asset::remove_prefixes(link_string, UPPER_FOLDER_PATHS); + assert_eq!("..\\assets\\verify.jpeg", link_string); + let new_link = Asset::remove_prefixes(link_string, UPPER_FOLDER_PATHS); + assert_eq!("assets\\verify.jpeg", new_link); + + let upper_folder_path = &[UPPER_PARENT_LINUX, UPPER_PARENT, MAIN_SEPARATOR_STR, &"/"]; + let link_string = String::from("assets\\verify.jpeg"); + let link_string = Asset::remove_prefixes(link_string, upper_folder_path); + assert_eq!("assets\\verify.jpeg", link_string); + + let link_string = String::from("/assets\\verify.jpeg"); + let link_string = Asset::remove_prefixes(link_string, upper_folder_path); + assert_eq!("assets\\verify.jpeg", link_string); + + let link_string = String::from("..\\..\\assets\\verify.jpeg"); + let link_string = Asset::remove_prefixes(link_string, upper_folder_path); + assert_eq!("..\\assets\\verify.jpeg", link_string); + let new_link = Asset::remove_prefixes(link_string, upper_folder_path); + assert_eq!("assets\\verify.jpeg", new_link); + } + + #[test] + fn test_compute_asset_path_by_src_and_link() { + let mut book_or_chapter_src = ["media", "book", "src"].iter().collect::(); + + let mut link = "./asset1.jpg"; + let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); + let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); + asset_path = asset_path.join(normalized_link); // compose final result + assert_eq!( + asset_path.as_path().as_os_str(), + (["media", "book", "src", "asset1.jpg"]) + .iter() + .collect::() + .as_os_str() + ); + + link = "asset1.jpg"; + let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); + let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); + asset_path = asset_path.join(normalized_link); // compose final result + assert_eq!( + asset_path.as_path(), + ["media", "book", "src", "asset1.jpg"] + .iter() + .collect::() + ); + + link = "../upper/assets/asset1.jpg"; + let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); + let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); + asset_path = asset_path.join(normalized_link); // compose final result + assert_eq!( + asset_path.as_path(), + ["media", "book", "upper", "assets", "asset1.jpg"] + .iter() + .collect::() + ); + + link = "assets/asset1.jpg"; + let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); + let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); + asset_path = asset_path.join(normalized_link); // compose final result + assert_eq!( + asset_path.as_path(), + ["media", "book", "src", "assets", "asset1.jpg"] + .iter() + .collect::() + ); + + link = "./assets/asset1.jpg"; + let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); + let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); + asset_path = asset_path.join(normalized_link); // compose final result + assert_eq!( + asset_path.as_path(), + ["media", "book", "src", "assets", "asset1.jpg"] + .iter() + .collect::() + ); + + book_or_chapter_src = ["media", "book", "src", "chapter1"] + .iter() + .collect::(); + + link = "../assets/asset1.jpg"; + let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); + let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); + asset_path = asset_path.join(normalized_link); // compose final result + assert_eq!( + asset_path.as_path(), + ["media", "book", "src", "assets", "asset1.jpg"] + .iter() + .collect::() + ); + + book_or_chapter_src = ["media", "book", "src", "chapter1", "inner"] + .iter() + .collect::(); + link = "../../assets/asset1.jpg"; + let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); + let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); + asset_path = asset_path.join(normalized_link); // compose final result + assert_eq!( + asset_path.as_path(), + ["media", "book", "src", "assets", "asset1.jpg"] + .iter() + .collect::() + ); + } + + #[cfg(target_os = "windows")] + #[test] + fn test_compute_asset_path_by_src_and_link_windows() { + let mut book_or_chapter_src = ["media", "book", "src"].iter().collect::(); + + let mut link = ".\\asset1.jpg"; + let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); + let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); + asset_path = asset_path.join(normalized_link); // compose final result + assert_eq!( + asset_path.as_path().as_os_str(), + (["media", "book", "src", "asset1.jpg"]) + .iter() + .collect::() + .as_os_str() + ); + + link = "asset1.jpg"; + let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); + let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); + asset_path = asset_path.join(normalized_link); // compose final result + assert_eq!( + asset_path.as_path(), + ["media", "book", "src", "asset1.jpg"] + .iter() + .collect::() + ); + + link = "..\\upper\\assets\\asset1.jpg"; + let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); + let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); + asset_path = asset_path.join(normalized_link); // compose final result + assert_eq!( + asset_path.as_path(), + ["media", "book", "upper", "assets", "asset1.jpg"] + .iter() + .collect::() + ); + + link = "assets\\asset1.jpg"; + let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); + let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); + asset_path = asset_path.join(normalized_link); // compose final result + assert_eq!( + asset_path.as_path(), + ["media", "book", "src", "assets", "asset1.jpg"] + .iter() + .collect::() + ); + + link = ".\\assets\\asset1.jpg"; + let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); + let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); + asset_path = asset_path.join(normalized_link); // compose final result + assert_eq!( + asset_path.as_path(), + ["media", "book", "src", "assets", "asset1.jpg"] + .iter() + .collect::() + ); + + book_or_chapter_src = ["media", "book", "src", "chapter1"] + .iter() + .collect::(); + + link = "..\\assets\\asset1.jpg"; + let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); + let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); + asset_path = asset_path.join(normalized_link); // compose final result + assert_eq!( + asset_path.as_path(), + ["media", "book", "src", "assets", "asset1.jpg"] + .iter() + .collect::() + ); + + book_or_chapter_src = ["media", "book", "src", "chapter1", "inner"] + .iter() + .collect::(); + link = "..\\..\\assets\\asset1.jpg"; + let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); + let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); + asset_path = asset_path.join(normalized_link); // compose final result + assert_eq!( + asset_path.as_path(), + ["media", "book", "src", "assets", "asset1.jpg"] + .iter() + .collect::() + ); + } + + #[cfg(not(target_os = "windows"))] + #[test] + fn incorrect_compute_asset_path_by_src_and_link() { + let book_or_chapter_src = ["media", "book", "src"].iter().collect::(); + + let link = "/assets/asset1.jpg"; + let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); + let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); + asset_path = asset_path.join(normalized_link); // compose final result + assert_eq!(asset_path.as_path(), Path::new("/assets/asset1.jpg")); + + let link = "/../assets/asset1.jpg"; + let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); + let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); + asset_path = asset_path.join(normalized_link); // compose final result + assert_eq!(asset_path.as_path(), Path::new("/assets/asset1.jpg")); + } + + #[cfg(target_os = "windows")] + #[test] + fn incorrect_compute_asset_path_by_src_and_link_windows() { + let book_or_chapter_src = ["media", "book", "src"].iter().collect::(); + + let link = "\\assets\\asset1.jpg"; + let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); + let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); + asset_path = asset_path.join(normalized_link); // compose final result + assert_eq!(asset_path.as_path(), Path::new("/assets/asset1.jpg")); + + let link = "\\..\\assets/asset1.jpg"; + let mut asset_path = Asset::compute_asset_path_by_src_and_link(link, &book_or_chapter_src); + let normalized_link = utils::normalize_path(PathBuf::from(link).as_path()); + asset_path = asset_path.join(normalized_link); // compose final result + assert_eq!(asset_path.as_path(), Path::new("/assets/asset1.jpg")); + } +} diff --git a/src/resources/retrieve.rs b/src/resources/retrieve.rs new file mode 100644 index 000000000..311896b74 --- /dev/null +++ b/src/resources/retrieve.rs @@ -0,0 +1,124 @@ +use std::{ + fs::{self, File, OpenOptions}, + io::{self, Read}, + path::Path, +}; + +#[cfg(test)] +use mockall::automock; + +use crate::Error; +use crate::resources::asset::{Asset, AssetKind}; + +#[cfg_attr(test, automock)] +pub(crate) trait ContentRetriever { + fn download(&self, asset: &Asset) -> Result<(), Error> { + if let AssetKind::Remote(url) = &asset.source { + let dest = &asset.location_on_disk; + if dest.is_file() { + debug!("Cache file {:?} to '{}' already exists.", dest, url); + } else { + if let Some(cache_dir) = dest.parent() { + fs::create_dir_all(cache_dir)?; + } + debug!("Downloading asset : {}", url); + let mut file = OpenOptions::new().create(true).write(true).open(dest)?; + let mut resp = self.retrieve(url.as_str())?; + io::copy(&mut resp, &mut file)?; + debug!("Downloaded asset by '{}'", url); + } + } + Ok(()) + } + fn read(&self, path: &Path, buffer: &mut Vec) -> Result<(), Error> { + File::open(path)?.read_to_end(buffer)?; + Ok(()) + } + fn retrieve(&self, url: &str) -> Result, Error>; +} + +pub(crate) struct ResourceHandler; +impl ContentRetriever for ResourceHandler { + fn retrieve(&self, url: &str) -> Result, Error> { + let res = ureq::get(url).call()?; + match res.status() { + 200 => Ok(res.into_reader()), + 404 => Err(Error::AssetFileNotFound(format!( + "Missing remote resource: {url}" + ))), + _ => unreachable!("Unexpected response status for '{url}'"), + } + } +} + +#[cfg(test)] +mod tests { + use tempdir::TempDir; + + use crate::errors::Error; + use crate::resources::asset::Asset; + + use super::ContentRetriever; + + type BoxRead = Box<(dyn std::io::Read + Send + Sync + 'static)>; + + #[test] + fn download_success() { + use std::io::Read; + + struct TestHandler; + impl ContentRetriever for TestHandler { + fn retrieve(&self, _url: &str) -> Result { + Ok(Box::new("Downloaded content".as_bytes())) + } + } + let cr = TestHandler {}; + let a = temp_remote_asset("https://mdbook-epub.org/image.svg").unwrap(); + let r = cr.download(&a); + + assert!(r.is_ok()); + let mut buffer = String::new(); + let mut f = std::fs::File::open(&a.location_on_disk).unwrap(); + f.read_to_string(&mut buffer).unwrap(); + assert_eq!(buffer, "Downloaded content"); + } + + #[test] + fn download_fail_when_resource_not_exist() { + struct TestHandler; + impl ContentRetriever for TestHandler { + fn retrieve(&self, url: &str) -> Result { + Err(Error::AssetFileNotFound(format!( + "Missing remote resource: {url}" + ))) + } + } + let cr = TestHandler {}; + let a = temp_remote_asset("https://mdbook-epub.org/not-exist.svg").unwrap(); + let r = cr.download(&a); + + assert!(r.is_err()); + assert!(matches!(r.unwrap_err(), Error::AssetFileNotFound(_))); + } + + #[test] + #[should_panic(expected = "NOT 200 or 404")] + fn download_fail_with_unexpected_status() { + struct TestHandler; + impl ContentRetriever for TestHandler { + fn retrieve(&self, _url: &str) -> Result { + panic!("NOT 200 or 404") + } + } + let cr = TestHandler {}; + let a = temp_remote_asset("https://mdbook-epub.org/bad.svg").unwrap(); + let r = cr.download(&a); + + panic!("{}", r.unwrap_err().to_string()); + } + + fn temp_remote_asset(url: &str) -> Result { + let dest_dir = TempDir::new("mdbook-epub")?; + Asset::from_url(url::Url::parse(url).unwrap(), dest_dir.path()) + } +} diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index e1a89f909..e73d6d08b 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -1,25 +1,26 @@ -use ::epub; -use ::mdbook; -use ::mdbook_epub; -use ::tempdir; -use std::env; -use std::fs::File; -use std::io::BufReader; - #[macro_use] extern crate log; #[macro_use] extern crate serial_test; -use epub::doc::EpubDoc; -use mdbook::renderer::RenderContext; -use mdbook::MDBook; -use mdbook_epub::Error; +use std::env; +use std::fs::File; +use std::io::BufReader; use std::path::{Path, PathBuf}; use std::process::Command; use std::sync::Once; + +use ::epub; +use ::mdbook; +use ::tempdir; +use epub::doc::EpubDoc; +use mdbook::renderer::RenderContext; +use mdbook::MDBook; use tempdir::TempDir; +use ::mdbook_epub; +use mdbook_epub::errors::Error; + static INIT: Once = Once::new(); fn init_logging() { From 9841c69c1e3ea151deba2c9ac7ed4f733dbfa90b Mon Sep 17 00:00:00 2001 From: Yuriy Larin Date: Wed, 7 Feb 2024 18:38:50 +0200 Subject: [PATCH 2/3] renamed resources file --- src/generator.rs | 4 ++-- src/resources.rs | 2 +- src/resources/asset.rs | 2 +- src/resources/{resources.rs => resource.rs} | 0 4 files changed, 4 insertions(+), 4 deletions(-) rename src/resources/{resources.rs => resource.rs} (100%) diff --git a/src/generator.rs b/src/generator.rs index cec2fa9e6..a8c281044 100644 --- a/src/generator.rs +++ b/src/generator.rs @@ -18,7 +18,7 @@ use url::Url; use crate::config::Config; use crate::resources::retrieve::{ContentRetriever, ResourceHandler}; -use crate::resources::resources::{self}; +use crate::resources::resource::{self}; use crate::DEFAULT_CSS; use crate::{Error, utils}; use crate::resources::asset::{Asset, AssetKind}; @@ -114,7 +114,7 @@ impl<'a> Generator<'a> { let error = String::from("Failed finding/fetch resource taken from content? Look up content for possible error..."); // resources::find can emit very unclear error based on internal MD content, // so let's give a tip to user in error message - let assets = resources::find(self.ctx).map_err(|e| { + let assets = resource::find(self.ctx).map_err(|e| { error!("{} Caused by: {}", error, e); e })?; diff --git a/src/resources.rs b/src/resources.rs index ee3cc0821..0b7f252cd 100644 --- a/src/resources.rs +++ b/src/resources.rs @@ -1,3 +1,3 @@ -pub(crate) mod resources; +pub(crate) mod resource; pub(crate) mod retrieve; pub(crate) mod asset; \ No newline at end of file diff --git a/src/resources/asset.rs b/src/resources/asset.rs index 9721a1a35..57e9c5879 100644 --- a/src/resources/asset.rs +++ b/src/resources/asset.rs @@ -2,7 +2,7 @@ use std::path::{MAIN_SEPARATOR_STR, Path, PathBuf}; use url::Url; use mime_guess::Mime; use crate::errors::Error; -use crate::resources::resources::{UPPER_FOLDER_PATHS, UPPER_PARENT, UPPER_PARENT_LINUX, UPPER_PARENT_STARTS_SLASH, UPPER_PARENT_STARTS_SLASH_LINUX}; +use crate::resources::resource::{UPPER_FOLDER_PATHS, UPPER_PARENT, UPPER_PARENT_LINUX, UPPER_PARENT_STARTS_SLASH, UPPER_PARENT_STARTS_SLASH_LINUX}; use crate::utils; /// The type of asset, remote or local diff --git a/src/resources/resources.rs b/src/resources/resource.rs similarity index 100% rename from src/resources/resources.rs rename to src/resources/resource.rs From 4f513d4e90dafb7981356a25f5769eb44ac1c76c Mon Sep 17 00:00:00 2001 From: Yuriy Larin Date: Wed, 7 Feb 2024 19:07:16 +0200 Subject: [PATCH 3/3] bumped to '0.4.37' mdbook --- Cargo.lock | 196 ++++++++++++++++++++++++++++++++++++++++++++++------- Cargo.toml | 4 +- 2 files changed, 172 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d92898e1c..459979037 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -44,17 +44,31 @@ dependencies = [ "anstyle", "anstyle-parse", "anstyle-query", - "anstyle-wincon", + "anstyle-wincon 1.0.1", "colorchoice", "is-terminal", "utf8parse", ] +[[package]] +name = "anstream" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon 3.0.2", + "colorchoice", + "utf8parse", +] + [[package]] name = "anstyle" -version = "1.0.0" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41ed9a86bf92ae6580e0a31281f65a1b1d867c0cc68d5346e2ae128dddfa6a7d" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" [[package]] name = "anstyle-parse" @@ -84,6 +98,16 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "anstyle-wincon" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" +dependencies = [ + "anstyle", + "windows-sys 0.52.0", +] + [[package]] name = "anyhow" version = "1.0.71" @@ -119,6 +143,12 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" + [[package]] name = "block-buffer" version = "0.10.4" @@ -186,7 +216,7 @@ checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" dependencies = [ "ansi_term", "atty", - "bitflags", + "bitflags 1.3.2", "strsim 0.8.0", "textwrap", "unicode-width", @@ -208,7 +238,7 @@ version = "4.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f2763db829349bf00cfc06251268865ed4363b93a943174f638daf3ecdba2cd" dependencies = [ - "anstream", + "anstream 0.3.2", "anstyle", "clap_lex", "once_cell", @@ -353,6 +383,16 @@ version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" +[[package]] +name = "env_filter" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea" +dependencies = [ + "log", + "regex", +] + [[package]] name = "env_logger" version = "0.10.0" @@ -366,6 +406,19 @@ dependencies = [ "termcolor", ] +[[package]] +name = "env_logger" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05e7cf40684ae96ade6232ed84582f40ce0a66efcd43a5117aef610534f8e0b8" +dependencies = [ + "anstream 0.6.11", + "anstyle", + "env_filter", + "humantime", + "log", +] + [[package]] name = "epub" version = "2.1.1" @@ -597,6 +650,20 @@ dependencies = [ "thiserror", ] +[[package]] +name = "handlebars" +version = "5.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab283476b99e66691dee3f1640fea91487a8d81f50fb5ecc75538f8f8879a1e4" +dependencies = [ + "log", + "pest", + "pest_derive", + "serde", + "serde_json", + "thiserror", +] + [[package]] name = "hashbrown" version = "0.12.3" @@ -791,22 +858,21 @@ dependencies = [ [[package]] name = "mdbook" -version = "0.4.36" +version = "0.4.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80992cb0e05f22cc052c99f8e883f1593b891014b96a8b4637fd274d7030c85e" +checksum = "0c33564061c3c640bed5ace7d6a2a1b65f2c64257d1ac930c15e94ed0fb561d3" dependencies = [ "anyhow", "chrono", "clap 4.3.12", "clap_complete", - "env_logger", - "handlebars", + "env_logger 0.11.1", + "handlebars 5.1.0", "log", "memchr", "once_cell", "opener", - "pathdiff", - "pulldown-cmark", + "pulldown-cmark 0.10.0", "regex", "serde", "serde_json", @@ -818,20 +884,20 @@ dependencies = [ [[package]] name = "mdbook-epub" -version = "0.4.36" +version = "0.4.37" dependencies = [ "const_format", - "env_logger", + "env_logger 0.10.0", "epub", "epub-builder", "eyre", - "handlebars", + "handlebars 4.3.7", "html_parser", "log", "mdbook", "mime_guess", "mockall", - "pulldown-cmark", + "pulldown-cmark 0.9.3", "semver", "serde", "serde_derive", @@ -977,12 +1043,6 @@ dependencies = [ "windows-targets 0.48.0", ] -[[package]] -name = "pathdiff" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd" - [[package]] name = "percent-encoding" version = "2.3.0" @@ -1114,12 +1174,30 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77a1a2f1f0a7ecff9c31abbe177637be0e97a0aef46cf8738ece09327985d998" dependencies = [ - "bitflags", + "bitflags 1.3.2", "getopts", "memchr", "unicase", ] +[[package]] +name = "pulldown-cmark" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce76ce678ffc8e5675b22aa1405de0b7037e2fdf8913fea40d1926c6fe1e6e7" +dependencies = [ + "bitflags 2.4.2", + "memchr", + "pulldown-cmark-escape", + "unicase", +] + +[[package]] +name = "pulldown-cmark-escape" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5d8f9aa0e3cbcfaf8bf00300004ee3b72f74770f9cbac93f6928771f613276b" + [[package]] name = "quote" version = "1.0.31" @@ -1172,7 +1250,7 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" dependencies = [ - "bitflags", + "bitflags 1.3.2", ] [[package]] @@ -1228,7 +1306,7 @@ version = "0.37.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d" dependencies = [ - "bitflags", + "bitflags 1.3.2", "errno", "io-lifetimes", "libc", @@ -1345,9 +1423,9 @@ dependencies = [ [[package]] name = "shlex" -version = "1.1.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "slab" @@ -1836,6 +1914,15 @@ dependencies = [ "windows-targets 0.48.0", ] +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.0", +] + [[package]] name = "windows-targets" version = "0.42.2" @@ -1866,6 +1953,21 @@ dependencies = [ "windows_x86_64_msvc 0.48.0", ] +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm 0.52.0", + "windows_aarch64_msvc 0.52.0", + "windows_i686_gnu 0.52.0", + "windows_i686_msvc 0.52.0", + "windows_x86_64_gnu 0.52.0", + "windows_x86_64_gnullvm 0.52.0", + "windows_x86_64_msvc 0.52.0", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.42.2" @@ -1878,6 +1980,12 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + [[package]] name = "windows_aarch64_msvc" version = "0.42.2" @@ -1890,6 +1998,12 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + [[package]] name = "windows_i686_gnu" version = "0.42.2" @@ -1902,6 +2016,12 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + [[package]] name = "windows_i686_msvc" version = "0.42.2" @@ -1914,6 +2034,12 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + [[package]] name = "windows_x86_64_gnu" version = "0.42.2" @@ -1926,6 +2052,12 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.2" @@ -1938,6 +2070,12 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + [[package]] name = "windows_x86_64_msvc" version = "0.42.2" @@ -1950,6 +2088,12 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" + [[package]] name = "xml-rs" version = "0.8.14" diff --git a/Cargo.toml b/Cargo.toml index 33f75b3f0..3b1cad0fa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ authors = ["Michael Bryan "] description = "An EPUB renderer for mdbook." name = "mdbook-epub" -version = "0.4.36" +version = "0.4.37" readme = "README.md" license = "MIT" repository = "https://github.com/Michael-F-Bryan/mdbook-epub" @@ -35,7 +35,7 @@ structopt = "0.3.26" mime_guess = "2.0" env_logger = "0.10.0" log = "0.4.17" -mdbook = { version = "0.4.36", default-features = false } +mdbook = { version = "0.4.37", default-features = false } handlebars = "4.3.7" toml = "0.5.11" # downgraded due to parent 'mdbook' dependency and error there html_parser = "0.7.0"