Skip to content

Commit

Permalink
fix: handle more edge cases
Browse files Browse the repository at this point in the history
  • Loading branch information
anonrig committed Aug 25, 2023
1 parent be0cfd2 commit 9f90a93
Show file tree
Hide file tree
Showing 9 changed files with 35 additions and 27 deletions.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion lychee-lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ repository = "https://github.com/lycheeverse/lychee"
version = "0.13.0"

[dependencies]
ada-url = { version = "1.4.0", features = ["serde"] }
ada-url = { version = "1.4.1", features = ["serde"] }
async-stream = "0.3.5"
cached = "0.44.0"
check-if-email-exists = { version = "0.9.0", optional = true }
Expand Down
4 changes: 1 addition & 3 deletions lychee-lib/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -674,9 +674,7 @@ impl Client {

/// Check a `file` URI.
pub async fn check_file(&self, uri: &Uri) -> Status {
let Ok(path) = uri.url.to_file_path() else {
return ErrorKind::InvalidFilePath(uri.clone()).into();
};
let path = Path::new(uri.url.href());
if !path.exists() {
return ErrorKind::InvalidFilePath(uri.clone()).into();
}
Expand Down
2 changes: 1 addition & 1 deletion lychee-lib/src/types/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ pub enum ErrorKind {

/// The given string can not be parsed into a valid URL, e-mail address, or file path
#[error("Cannot parse string `{1}` as website url: {0}")]
ParseUrl(#[source] ada_url::Error, String),
ParseUrl(String, String),

/// The given URI cannot be converted to a file path
#[error("Cannot find file")]
Expand Down
2 changes: 1 addition & 1 deletion lychee-lib/src/types/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ impl Input {
// Curl also uses http (i.e. not https), see
// https://github.com/curl/curl/blob/70ac27604a2abfa809a7b2736506af0da8c3c8a9/lib/urlapi.c#L1104-L1124
let url = Url::parse(&format!("http://{value}")).map_err(|e| {
ErrorKind::ParseUrl(e, "Input is not a valid URL".to_string())
ErrorKind::ParseUrl(e.to_string(), "Input is not a valid URL".to_string())
})?;
InputSource::RemoteUrl(Box::new(url))
}
Expand Down
2 changes: 1 addition & 1 deletion lychee-lib/src/types/request.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ impl TryFrom<Uri> for Request {
fn try_from(uri: Uri) -> Result<Self, Self::Error> {
Ok(Request::new(
uri.clone(),
InputSource::RemoteUrl(Box::new(uri.url)),
InputSource::String(uri.url.href().to_string()),
None,
None,
None,
Expand Down
21 changes: 13 additions & 8 deletions lychee-lib/src/types/uri/valid.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::{convert::TryFrom, fmt::Display, net::IpAddr};

use ada_url::Url;
use ada_url::{HostType, Url};
use email_address::EmailAddress;
use ip_network::Ipv6Network;
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -28,34 +28,39 @@ impl Uri {
#[inline]
#[must_use]
pub fn as_str(&self) -> &str {
self.url.as_ref().trim_start_matches("mailto:")
self.url.href().trim_start_matches("mailto:")
}

#[inline]
#[must_use]
/// Returns the scheme of the URI (e.g. `http` or `mailto`)
pub fn scheme(&self) -> &str {
self.url.scheme()
self.url.protocol()
}

#[inline]
/// Changes this URL's scheme.
pub(crate) fn set_scheme(&mut self, scheme: &str) -> std::result::Result<(), ()> {
self.url.set_scheme(scheme)
self.url.set_protocol(scheme);
Ok(())
}

#[inline]
#[must_use]
/// Returns the domain of the URI (e.g. `example.com`)
pub fn domain(&self) -> Option<&str> {
self.url.domain()
if self.url.host_type() == HostType::Domain {
Some(self.url.hostname())
} else {
None
}
}

#[inline]
#[must_use]
/// Returns the path of the URI (e.g. `/path/to/resource`)
pub fn path(&self) -> &str {
self.url.path()
self.url.pathname()
}

#[inline]
Expand All @@ -66,7 +71,7 @@ impl Uri {
///
/// Return `None` for cannot-be-a-base URLs.
pub fn path_segments(&self) -> Option<std::str::Split<char>> {
self.url.path_segments()
Some(self.url.pathname().split('/'))
}

#[must_use]
Expand Down Expand Up @@ -278,7 +283,7 @@ impl TryFrom<&str> for Uri {
};

// We do not handle relative URLs here, as we do not know the base URL.
Err(ErrorKind::ParseUrl(err, s.to_owned()))
Err(ErrorKind::ParseUrl(err.to_string(), s.to_owned()))
}
}
}
Expand Down
5 changes: 3 additions & 2 deletions lychee-lib/src/utils/fragment_checker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,10 @@ impl FragmentChecker {
///
/// In all other cases, returns true.
pub(crate) async fn check(&self, path: &Path, url: &ada_url::Url) -> Result<bool> {
let Some(fragment) = url.fragment() else {
if !url.has_hash() {
return Ok(true);
};
}
let fragment = url.hash();
let url_without_frag = Self::remove_fragment(url.clone());

let extractor = match FileType::from(path) {
Expand Down
20 changes: 12 additions & 8 deletions lychee-lib/src/utils/request.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use ada_url::Url;
use log::info;
use percent_encoding::percent_decode_str;
use reqwest::Url;
use std::{
collections::HashSet,
path::{Path, PathBuf},
Expand Down Expand Up @@ -123,11 +123,15 @@ pub(crate) fn create(
Ok(HashSet::from_iter(requests))
}

fn construct_url(base: &Option<Url>, text: &str) -> Option<Result<Url>> {
base.as_ref().map(|base| {
base.join(text)
.map_err(|e| ErrorKind::ParseUrl(e, format!("{base}{text}")))
})
fn construct_url(base: &Option<ada_url::Url>, text: &str) -> Option<Result<Url>> {
Some(
Url::parse(text, base.and_then(|b| Some(b.href())).or_else(|| None)).map_err(|e| {
ErrorKind::ParseUrl(
e.to_string(),
format!("{0}{text}", base.and_then(|b| Some(b.href())).unwrap_or("")),
)
}),
)
}

fn create_uri_from_path(
Expand All @@ -148,9 +152,9 @@ fn create_uri_from_path(
let decoded = percent_decode_str(dst).decode_utf8()?;
let resolved = path::resolve(src, &PathBuf::from(&*decoded), base)?;
match resolved {
Some(path) => Url::parse(path.to_str().unwrap(), Some("file://"))
Some(path) => ada_url::Url::parse(path.to_str().unwrap(), Some("file://"))
.map(|mut url| {
url.set_fragment(frag);
url.set_hash(frag.unwrap_or(""));
url
})
.map(Some)
Expand Down

0 comments on commit 9f90a93

Please sign in to comment.