diff --git a/src/config.rs b/src/config.rs index 8147a17..4bdcc91 100644 --- a/src/config.rs +++ b/src/config.rs @@ -14,6 +14,7 @@ pub struct Config pub DEBUG: Option, // debug mode? pub DONTDOWNLOADME_FILEPATH: Option, // path to file containing hentai ID to not download, blacklist pub DOWNLOADME_FILEPATH: Option, // path to file containing hentai ID to download + pub FALLBACK_TO_ARCHIVE_ORG: Option, // allow pull from archive.org? false by default pub LIBRARY_PATH: String, // path to download hentai to pub LIBRARY_SPLIT: Option, // split library into subdirectories of maximum this many hentai, None or 0 to disable pub NHENTAI_TAGS: Option>, // keep creating downloadme.txt from these tags and keep downloading (server mode), normal tags are in format "tag:{tag}" for example "tag:ffm-threesome"; if None: don't generate downloadme.txt, download hentai once (client mode) @@ -39,6 +40,7 @@ impl Default for Config NHENTAI_TAGS: None, SLEEP_INTERVAL: Some(50000), USER_AGENT: Some("".to_owned()), + FALLBACK_TO_ARCHIVE_ORG: None, } } } \ No newline at end of file diff --git a/src/hentai.rs b/src/hentai.rs index 05ebec8..2820c75 100644 --- a/src/hentai.rs +++ b/src/hentai.rs @@ -130,10 +130,11 @@ impl Hentai /// # Arguments /// - `http_client`: reqwest http client /// - `db`: database connectionc + /// - `webarchive`: Download from web archive? False by default. /// /// # Returns /// - nothing or error - pub async fn download(&self, http_client: &reqwest::Client, cleanup_temporary_files: bool) -> Result<(), HentaiDownloadError> + pub async fn download(&self, http_client: &reqwest::Client, cleanup_temporary_files: bool, webarchive: bool) -> Result<(), HentaiDownloadError> { const WORKERS: usize = 5; // number of parallel workers let cbz_final_filepath: String; //filepath to final cbz in library @@ -218,6 +219,44 @@ impl Hentai } if image_download_success {break;} // if all images were downloaded successfully: continue with cbz creation } + if !image_download_success && webarchive == true { // Web Archive Loop + image_download_success = true; // assume success + handles = Vec::new(); // reset handles + + for i in 0..self.images_url.len() // for each page + { + let f_clone: scaler::Formatter = f.clone(); + let http_client_clone: reqwest::Client = http_client.clone(); + let image_filepath: String = format!("{}{}/{}", self.library_path, self.id, self.images_filename.get(i).expect("Index out of bounds even though should have same size as images_url.")); + let image_url_clone: String = self.images_url.get(i).expect("Index out of bounds even though checked before that it fits.").clone(); + let num_pages_clone: u16 = self.num_pages; + + let permit: tokio::sync::OwnedSemaphorePermit = worker_sem.clone().acquire_owned().await.expect("Something closed semaphore even though it should never be closed."); // acquire semaphore + handles.push(tokio::spawn(async move + { + let result: Option<()>; + match Self::archive_image(&http_client_clone, &image_url_clone, &image_filepath).await // download image + { + Ok(_) => + { + log::debug!("Downloaded hentai image {} / {}.", f_clone.format((i+1) as f64), f_clone.format(num_pages_clone as f64)); + result = Some(()); // success + } + Err(e) => + { + log::warn!("{e}"); + result = None; // failure + } + } + drop(permit); // release semaphore + result // return result into handle + })); // search all pages in parallel + } + for handle in handles + { + if handle.await.unwrap().is_none() {image_download_success = false;} // collect results, forward panics, abort so we don't needlessly spam IA on a set that won't download + } + } if !image_download_success {return Err(HentaiDownloadError::Download {})}; // if after 5 attempts still not all images downloaded successfully: give up log::info!("Downloaded hentai images."); @@ -331,6 +370,7 @@ impl Hentai let mut r: reqwest::Response = http_client.get(image_url).send().await?; // tag search on general media server, page + if r.status() != reqwest::StatusCode::OK // if status not ok: retry with other media servers { for media_server in MEDIA_SERVERS // try all media servers @@ -341,6 +381,69 @@ impl Hentai if r.status() == reqwest::StatusCode::OK {break;} // if not ok: try again } } + + if r.status() != reqwest::StatusCode::OK {return Err(HentaiDownloadImageError::ReqwestStatus {url: image_url.to_owned(), status: r.status()});} // if status still not ok: something went wrong + + let mut file: tokio::fs::File; + #[cfg(target_family = "unix")] + { + if let Some(parent) = std::path::Path::new(image_filepath).parent() // create all parent directories with permissions "drwxrwxrwx" + { + if let Err(e) = tokio::fs::DirBuilder::new().recursive(true).mode(0o777).create(parent).await + { + return Err(HentaiDownloadImageError::StdIo {filepath: image_filepath.to_owned(), source: e}); + } + } + match tokio::fs::OpenOptions::new().create_new(true).mode(0o666).write(true).open(image_filepath).await + { + Ok(o) => file = o, + Err(e) => {return Err(HentaiDownloadImageError::StdIo {filepath: image_filepath.to_owned(), source: e});} + } + } + #[cfg(not(target_family = "unix"))] + { + if let Some(parent) = std::path::Path::new(image_filepath).parent() // create all parent directories + { + if let Err(e) = tokio::fs::DirBuilder::new().recursive(true).create(parent).await + { + return Err(HentaiDownloadImageError::StdIo {filepath: image_filepath.to_owned(), source: e}); + } + } + match tokio::fs::OpenOptions::new().create_new(true).write(true).open(image_filepath).await + { + Ok(o) => file = o, + Err(e) => {return Err(HentaiDownloadImageError::StdIo {filepath: image_filepath.to_owned(), source: e});} + } + } + + if let Err(e) = file.write_all_buf(&mut r.bytes().await?).await // save image with permissions "rw-rw-rw-" + { + return Err(HentaiDownloadImageError::StdIo {filepath: image_filepath.to_owned(), source: e}); + } + + return Ok(()); + } + + async fn archive_image(http_client: &reqwest::Client, image_url: &str, image_filepath: &str) -> Result<(), HentaiDownloadImageError> + { + + if let Ok(o) = tokio::fs::metadata(image_filepath).await + { + if o.is_file() {return Ok(());} // if image already exists: skip download + if o.is_dir() {return Err(HentaiDownloadImageError::BlockedByDirectory {directory_path: image_filepath.to_owned()});} // if image filepath blocked by directory: give up + } + + + let mut r: reqwest::Response = http_client.get(image_url).send().await?; // tag search on general media server, page + + if r.status() != reqwest::StatusCode::OK // if status not ok: retry with other media servers + { + log::warn!("Pulling from the Internet Archive: {image_url}"); + log::debug!("{}", image_url.replace("https://i.nhentai.net", format!("https://web.archive.org/web/00000000000000if_/https://i.nhentai.net").as_str())); + r = http_client.get(image_url.replace("https://i.nhentai.net", format!("https://web.archive.org/web/00000000000000if_/https://i.nhentai.net").as_str())).send().await?; // tag search, page, insert media server + log::debug!("{}", r.status()); + } + if r.status() != reqwest::StatusCode::OK {return Err(HentaiDownloadImageError::ReqwestStatus {url: image_url.to_owned(), status: r.status()});} // if status still not ok: something went wrong let mut file: tokio::fs::File; diff --git a/src/main_inner.rs b/src/main_inner.rs index 3c4ded5..4a61cf8 100644 --- a/src/main_inner.rs +++ b/src/main_inner.rs @@ -114,7 +114,7 @@ pub async fn main_inner(config: Config) -> Result<(), Error> } } - if let Err(e) = hentai.download(&http_client, config.CLEANUP_TEMPORARY_FILES.unwrap_or(true)).await + if let Err(e) = hentai.download(&http_client, config.CLEANUP_TEMPORARY_FILES.unwrap_or(true), config.FALLBACK_TO_ARCHIVE_ORG.unwrap_or(false)).await { log::error!{"{e}"}; }