Skip to content

Commit

Permalink
Add flag to force html mode
Browse files Browse the repository at this point in the history
This is an attempt to address lycheeverse#671 by adding a flag `--html` to parse
the input as HTML.  Otherwise, STDIN and local files without the `.html`
suffix are parsed as plain text.
  • Loading branch information
nacnudus committed Jul 1, 2022
1 parent fb367ef commit 1ef782e
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 9 deletions.
3 changes: 3 additions & 0 deletions fixtures/configs/smoketest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ include_verbatim = false
# Ignore case of paths when matching glob patterns.
glob_ignore_case = false

# Treat input as HTML
html = false

# Exclude URLs from checking (supports regex).
exclude = [ '.*\.github.com\.*' ]

Expand Down
17 changes: 14 additions & 3 deletions lychee-bin/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ use crate::parse::{parse_base, parse_statuscodes};
use anyhow::{anyhow, Context, Error, Result};
use const_format::{concatcp, formatcp};
use lychee_lib::{
Base, Input, DEFAULT_MAX_REDIRECTS, DEFAULT_MAX_RETRIES, DEFAULT_RETRY_WAIT_TIME_SECS,
DEFAULT_TIMEOUT_SECS, DEFAULT_USER_AGENT,
Base, Input, FileType, DEFAULT_MAX_REDIRECTS, DEFAULT_MAX_RETRIES,
DEFAULT_RETRY_WAIT_TIME_SECS, DEFAULT_TIMEOUT_SECS, DEFAULT_USER_AGENT,
};
use secrecy::{ExposeSecret, SecretString};
use serde::Deserialize;
Expand Down Expand Up @@ -122,14 +122,19 @@ impl LycheeOptions {
// but we'd get no access to `glob_ignore_case`.
/// Get parsed inputs from options.
pub(crate) fn inputs(&self) -> Result<Vec<Input>> {
let file_type_hint = if self.config.html {
Some(FileType::Html)
} else {
None
};
let excluded = if self.config.exclude_path.is_empty() {
None
} else {
Some(self.config.exclude_path.clone())
};
self.raw_inputs
.iter()
.map(|s| Input::new(s, None, self.config.glob_ignore_case, excluded.clone()))
.map(|s| Input::new(s, file_type_hint, self.config.glob_ignore_case, excluded.clone()))
.collect::<Result<_, _>>()
.context("Cannot parse inputs from arguments")
}
Expand Down Expand Up @@ -319,6 +324,11 @@ pub(crate) struct Config {
#[serde(default)]
pub(crate) glob_ignore_case: bool,

/// Treat the input as HTML
#[structopt(long)]
#[serde(default)]
pub(crate) html: bool,

/// Output file of status report
#[structopt(short, long, parse(from_os_str))]
#[serde(default)]
Expand Down Expand Up @@ -393,6 +403,7 @@ impl Config {
skip_missing: false;
include_verbatim: false;
glob_ignore_case: false;
html: false;
output: None;
require_https: false;
}
Expand Down
23 changes: 17 additions & 6 deletions lychee-lib/src/types/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::{helpers, ErrorKind, Result};
use async_stream::try_stream;
use futures::stream::Stream;
use glob::glob_with;
use jwalk::WalkDir;
use jwalk::WalkDirGeneric;
use reqwest::Url;
use serde::Serialize;
use shellexpand::tilde;
Expand Down Expand Up @@ -198,7 +198,7 @@ impl Input {
}
InputSource::FsPath(ref path) => {
if path.is_dir() {
for entry in WalkDir::new(path).skip_hidden(true)
for entry in WalkDirGeneric::<((usize), (Option<FileType>))>::new(path).skip_hidden(true)
.process_read_dir(move |_, _, _, children| {
children.retain(|child| {
let entry = match child.as_ref() {
Expand All @@ -224,19 +224,24 @@ impl Input {
}
return valid_extension(&entry.path());
});
children.first_mut().map(|child| {
if let Ok(entry) = child {
entry.client_state = self.file_type_hint;
}
});
}) {
let entry = entry?;
if entry.file_type().is_dir() {
continue;
}
let content = Self::path_content(entry.path()).await?;
let content = Self::path_content(entry.path(), entry.client_state).await?;
yield content
}
} else {
if self.is_excluded_path(path) {
return ();
}
let content = Self::path_content(path).await;
let content = Self::path_content(path, self.file_type_hint).await;
match content {
Err(_) if skip_missing => (),
Err(e) => Err(e)?,
Expand Down Expand Up @@ -301,7 +306,7 @@ impl Input {
if self.is_excluded_path(&path) {
continue;
}
let content: InputContent = Self::path_content(&path).await?;
let content: InputContent = Self::path_content(&path, self.file_type_hint).await?;
yield content;
}
Err(e) => eprintln!("{e:?}"),
Expand All @@ -325,13 +330,19 @@ impl Input {
/// Will return `Err` if file contents can't be read
pub async fn path_content<P: Into<PathBuf> + AsRef<Path> + Clone>(
path: P,
file_type_hint: Option<FileType>,
) -> Result<InputContent> {
let path = path.into();
let content = tokio::fs::read_to_string(&path)
.await
.map_err(|e| ErrorKind::ReadFileInput(e, path.clone()))?;
let file_type = if file_type_hint.is_none() {
FileType::from(&path)
} else {
file_type_hint.unwrap_or_default()
};
let input_content = InputContent {
file_type: FileType::from(&path),
file_type: file_type,
source: InputSource::FsPath(path),
content,
};
Expand Down
3 changes: 3 additions & 0 deletions lychee.example.toml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ include_verbatim = false
# Ignore case of paths when matching glob patterns.
glob_ignore_case = false

# Treat the input as HTML.
html = false

# Exclude URLs from checking (supports regex).
exclude = [ '.*\.github.com\.*' ]

Expand Down

0 comments on commit 1ef782e

Please sign in to comment.