Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add support for ranges in the --accept option / config field #1167

Merged
merged 23 commits into from
Sep 17, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
8528b8a
Foundation work for accept ranges
Techassi Jul 17, 2023
5e58638
Apply suggestions
Techassi Jul 17, 2023
c11e41d
Cleanup suggestions, fix clippy errors
Techassi Jul 17, 2023
7866393
Add more tests, fix edge cases
Techassi Jul 17, 2023
f3a7101
Fix clippy error
Techassi Jul 17, 2023
253d212
Merge remote-tracking branch 'origin/master' into feat/accept-range-s…
Techassi Jul 17, 2023
6645927
Integrate accept selector into CLI
Techassi Aug 1, 2023
48b2f82
Merge remote-tracking branch 'origin/master' into feat/accept-range-s…
Techassi Aug 1, 2023
4f45d52
Fix clippy errors
Techassi Aug 1, 2023
32f654a
Add support for comma-separated string and sequence of strings for ac…
Techassi Aug 6, 2023
f8aa68b
Merge branch 'master' into feat/accept-range-selectors
mre Aug 17, 2023
4bdf33d
Merge branch 'master' into feat/accept-range-selectors
mre Aug 22, 2023
53323f4
Implement `Default` and `Display` for `AcceptSelector`
Techassi Sep 3, 2023
f352a78
Update deps
Techassi Sep 3, 2023
e1a6aca
Fix cargo fmt errors
Techassi Sep 3, 2023
939d044
Merge branch 'master' into feat/accept-range-selectors
Techassi Sep 3, 2023
359bb2e
Fix clippy errors
Techassi Sep 3, 2023
ed2faab
Fix tests
Techassi Sep 9, 2023
1dbb9ad
Merge branch 'master' into feat/accept-range-selectors
Techassi Sep 9, 2023
1e43493
Print more specific error message when parsing TOML config
mre Sep 9, 2023
67fa627
Set serde default for `AcceptSelector`
mre Sep 9, 2023
430a29b
Merge branch 'master' into feat/accept-range-selectors
Techassi Sep 17, 2023
24e69c8
Fix tests
Techassi Sep 17, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions fixtures/configs/smoketest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,14 @@ timeout = 20
retry_wait_time = 2

# Comma-separated list of accepted status codes for valid links.
accept = [200, 429]
accept = "200, 429"
Techassi marked this conversation as resolved.
Show resolved Hide resolved

# Proceed for server connections considered insecure (invalid TLS).
insecure = false

# Only test links with the given schemes (e.g. https).
# Omit to check links with any scheme.
scheme = [ "https" ]
scheme = ["https"]

# When links are available using HTTPS, treat HTTP links as errors.
require_https = false
Expand All @@ -66,7 +66,7 @@ headers = []

# Remap URI matching pattern to different URI.
# This also supports (named) capturing groups.
remap = [
remap = [
"https://example.com http://example.invalid",
"https://example.com/(.*) http://example.org/$1",
"https://github.com/(?P<org>.*)/(?P<repo>.*) https://gitlab.com/$org/$repo",
Expand All @@ -93,13 +93,13 @@ include_verbatim = false
glob_ignore_case = false

# Exclude URLs and mail addresses from checking (supports regex).
exclude = [ '.*\.github.com\.*' ]
exclude = ['.*\.github.com\.*']

# Exclude these filesystem paths from getting checked.
exclude_path = ["file/path/to/Ignore", "./other/file/path/to/Ignore"]

# URLs to check (supports regex). Has preference over all excludes.
include = [ 'gist\.github\.com.*' ]
include = ['gist\.github\.com.*']

# Exclude all private IPs from checking.
# Equivalent to setting `exclude_private`, `exclude_link_local`, and
Expand Down
15 changes: 8 additions & 7 deletions lychee-bin/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,15 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc<CookieStoreMutex>>) -
cfg.scheme.clone()
};

let accepted = match cfg.accept {
Some(ref accepted) => {
let accepted: Result<HashSet<_>, _> = accepted
let accepted = match &cfg.accept {
Some(selector) => Some(
selector
.clone()
.into_set()
.iter()
.map(|code| StatusCode::from_u16(*code))
.collect();
Some(accepted?)
}
.map(|value| StatusCode::from_u16(*value))
.collect::<Result<HashSet<_>, _>>()?,
),
None => None,
};

Expand Down
5 changes: 4 additions & 1 deletion lychee-bin/src/commands/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,10 @@ where

let client = params.client;
let cache = params.cache;
let accept = params.cfg.accept;
let accept = match params.cfg.accept {
Some(selector) => Some(selector.into_set()),
None => None,
};

let pb = if params.cfg.no_progress || params.cfg.verbose.log_level() >= log::Level::Info {
None
Expand Down
10 changes: 5 additions & 5 deletions lychee-bin/src/options.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
use crate::archive::Archive;
use crate::parse::{parse_base, parse_statuscodes};
use crate::parse::parse_base;
use crate::verbosity::Verbosity;
use anyhow::{anyhow, Context, Error, Result};
use clap::{arg, builder::TypedValueParser, Parser};
use const_format::{concatcp, formatcp};
use lychee_lib::{
Base, BasicAuthSelector, Input, DEFAULT_MAX_REDIRECTS, DEFAULT_MAX_RETRIES,
AcceptSelector, Base, BasicAuthSelector, Input, DEFAULT_MAX_REDIRECTS, DEFAULT_MAX_RETRIES,
DEFAULT_RETRY_WAIT_TIME_SECS, DEFAULT_TIMEOUT_SECS, DEFAULT_USER_AGENT,
};
use secrecy::{ExposeSecret, SecretString};
use serde::Deserialize;
use std::path::Path;
use std::{collections::HashSet, fs, path::PathBuf, str::FromStr, time::Duration};
use std::{fs, path::PathBuf, str::FromStr, time::Duration};
use strum::VariantNames;

pub(crate) const LYCHEE_IGNORE_FILE: &str = ".lycheeignore";
Expand Down Expand Up @@ -297,9 +297,9 @@ pub(crate) struct Config {
pub(crate) header: Vec<String>,

/// Comma-separated list of accepted status codes for valid links
#[arg(short, long, value_parser = parse_statuscodes)]
#[arg(short, long)]
#[serde(default)]
pub(crate) accept: Option<HashSet<u16>>,
pub(crate) accept: Option<AcceptSelector>,

/// Website timeout in seconds from connect to response finished
#[arg(short, long, default_value = &TIMEOUT_STR)]
Expand Down
25 changes: 1 addition & 24 deletions lychee-bin/src/parse.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use anyhow::{anyhow, Context, Result};
use headers::{HeaderMap, HeaderName};
use lychee_lib::{remap::Remaps, Base};
use std::{collections::HashSet, time::Duration};
use std::time::Duration;

/// Split a single HTTP header into a (key, value) tuple
fn read_header(input: &str) -> Result<(String, String)> {
Expand Down Expand Up @@ -40,24 +40,8 @@ pub(crate) fn parse_base(src: &str) -> Result<Base, lychee_lib::ErrorKind> {
Base::try_from(src)
}

/// Parse HTTP status codes into a set of `StatusCode`
///
/// Note that this function does not convert the status codes into
/// `StatusCode` but rather into `u16` to avoid the need for
/// `http` as a dependency and to support custom status codes, which are
/// necessary for some websites, which don't adhere to the HTTP spec or IANA.
pub(crate) fn parse_statuscodes(accept: &str) -> Result<HashSet<u16>> {
let mut statuscodes = HashSet::new();
for code in accept.split(',') {
let code: u16 = code.parse::<u16>()?;
statuscodes.insert(code);
}
Ok(statuscodes)
}

#[cfg(test)]
mod tests {
use std::collections::HashSet;

use headers::HeaderMap;
use regex::Regex;
Expand All @@ -72,13 +56,6 @@ mod tests {
assert_eq!(parse_headers(&["accept=text/html"]).unwrap(), custom);
}

#[test]
fn test_parse_statuscodes() {
let actual = parse_statuscodes("200,204,301").unwrap();
let expected = IntoIterator::into_iter([200, 204, 301]).collect::<HashSet<_>>();
assert_eq!(actual, expected);
}

#[test]
fn test_parse_remap() {
let remaps =
Expand Down
1 change: 1 addition & 0 deletions lychee-lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ doc-comment = "0.3.3"
tempfile = "3.6.0"
wiremock = "0.5.19"
serde_json = "1.0.103"
rstest = "0.18.1"

[features]

Expand Down
6 changes: 3 additions & 3 deletions lychee-lib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ pub use crate::{
collector::Collector,
filter::{Excludes, Filter, Includes},
types::{
uri::valid::Uri, Base, BasicAuthCredentials, BasicAuthSelector, CacheStatus, CookieJar,
ErrorKind, FileType, Input, InputContent, InputSource, Request, Response, ResponseBody,
Result, Status,
uri::valid::Uri, AcceptRange, AcceptRangeError, AcceptSelector, Base, BasicAuthCredentials,
BasicAuthSelector, CacheStatus, CookieJar, ErrorKind, FileType, Input, InputContent,
InputSource, Request, Response, ResponseBody, Result, Status,
},
};
5 changes: 5 additions & 0 deletions lychee-lib/src/types/accept/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
mod range;
mod selector;

pub use range::*;
pub use selector::*;
Loading