Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add support for --dump-inputs #1159

Merged
merged 8 commits into from
Jul 16, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,9 @@ Options:
--dump
Don't perform any link checking. Instead, dump all the links extracted from inputs that would be checked

--dump-inputs
Don't perform any link collection and checking. Instead, dump all input sources from which links would be collected
Techassi marked this conversation as resolved.
Show resolved Hide resolved

--archive <ARCHIVE>
Specify the use of a specific web archive. Can be used in combination with `--suggest`

Expand Down
28 changes: 26 additions & 2 deletions lychee-bin/src/commands/dump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ where
let requests = params.requests;
tokio::pin!(requests);

if let Some(outfile) = &params.cfg.output {
fs::File::create(outfile)?;
if let Some(out_file) = &params.cfg.output {
fs::File::create(out_file)?;
}

let mut writer = create_writer(params.cfg.output)?;
Expand Down Expand Up @@ -70,6 +70,30 @@ where
Ok(ExitCode::Success)
}

/// Dump all input sources to stdout without detecting any links and checking
Techassi marked this conversation as resolved.
Show resolved Hide resolved
/// them.
pub(crate) async fn dump_inputs<S>(sources: S, output: Option<&PathBuf>) -> Result<ExitCode>
where
S: futures::Stream<Item = Result<String>>,
{
let sources = sources;
tokio::pin!(sources);

if let Some(out_file) = output {
fs::File::create(out_file)?;
}

let mut writer = create_writer(output.cloned())?;

while let Some(source) = sources.next().await {
let source = source?;

writeln!(writer, "{source}")?;
}

Ok(ExitCode::Success)
}

/// Dump request to stdout
fn write(
writer: &mut Box<dyn Write>,
Expand Down
1 change: 1 addition & 0 deletions lychee-bin/src/commands/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ pub(crate) mod dump;

pub(crate) use check::check;
pub(crate) use dump::dump;
pub(crate) use dump::dump_inputs;

use std::sync::Arc;

Expand Down
7 changes: 7 additions & 0 deletions lychee-bin/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,13 @@ async fn run(opts: &LycheeOptions) -> Result<i32> {
// File a bug if you rely on this envvar! It's going to go away eventually.
.use_html5ever(std::env::var("LYCHEE_USE_HTML5EVER").map_or(false, |x| x == "1"));

if opts.config.dump_inputs {
let sources = collector.collect_sources(inputs).await;
let exit_code = commands::dump_inputs(sources, opts.config.output.as_ref()).await?;

return Ok(exit_code as i32);
}

collector = if let Some(ref basic_auth) = opts.config.basic_auth {
collector.basic_auth_extractor(BasicAuthExtractor::new(basic_auth)?)
} else {
Expand Down
6 changes: 6 additions & 0 deletions lychee-bin/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,12 @@ pub(crate) struct Config {
#[serde(default)]
pub(crate) dump: bool,

/// Don't perform any link collection and checking.
Techassi marked this conversation as resolved.
Show resolved Hide resolved
/// Instead, dump all input sources from which links would be collected
#[arg(long)]
#[serde(default)]
pub(crate) dump_inputs: bool,

/// Specify the use of a specific web archive.
/// Can be used in combination with `--suggest`
#[arg(long, value_parser = clap::builder::PossibleValuesParser::new(Archive::VARIANTS).map(|s| s.parse::<Archive>().unwrap()))]
Expand Down
8 changes: 8 additions & 0 deletions lychee-lib/src/collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ impl Collector {
self
}

/// Collect all sources from a list of [`Input`]s. For further details,
/// see also [`Input::get_sources`](crate::Input#method.get_sources).
pub async fn collect_sources(self, inputs: Vec<Input>) -> impl Stream<Item = Result<String>> {
stream::iter(inputs)
.par_then_unordered(None, move |input| async move { input.get_sources().await })
.flatten()
}

/// Fetch all unique links from inputs
/// All relative URLs get prefixed with `base` (if given).
/// (This can be a directory or a base URL)
Expand Down
34 changes: 34 additions & 0 deletions lychee-lib/src/types/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,40 @@ impl Input {
}
}

/// Retrieve all sources from this input. The output depends on the type of
/// input:
///
/// - Remote URLs are returned as is, in their full form
/// - Filepath Glob Patterns are expanded and each matched entry is returned
/// - Absolute or relative filepaths are returned as is
/// - All other input types are not returned
Techassi marked this conversation as resolved.
Show resolved Hide resolved
///
/// # Errors
///
/// Returns an error if the globbing fails with the expanded pattern.
pub async fn get_sources(self) -> impl Stream<Item = Result<String>> {
try_stream! {
match self.source {
InputSource::RemoteUrl(url) => yield url.to_string(),
InputSource::FsGlob { pattern, ignore_case } => {
let glob_expanded = tilde(&pattern).to_string();
let mut match_opts = glob::MatchOptions::new();

match_opts.case_sensitive = !ignore_case;

for entry in glob_with(&glob_expanded, match_opts)? {
match entry {
Ok(path) => yield path.to_string_lossy().to_string(),
Err(e) => eprintln!("{e:?}")
}
}
},
InputSource::FsPath(path) => yield path.to_string_lossy().to_string(),
_ => (),
}
}
}

async fn url_contents(url: &Url) -> Result<InputContent> {
// Assume HTML for default paths
let file_type = if url.path().is_empty() || url.path() == "/" {
Expand Down