From 9497bf01933ff64cc8e9252117d02144c77063d2 Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 17 Jul 2023 13:15:41 +0200 Subject: [PATCH] Update docs --- lychee-lib/src/client.rs | 4 ++-- lychee-lib/src/extract/markdown.rs | 8 ++++--- lychee-lib/src/utils/fragment_checker.rs | 29 +++++++++++++++++++----- 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index d9d62ced22..b887235159 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -388,7 +388,7 @@ impl ClientBuilder { require_https: self.require_https, quirks, include_fragments: self.include_fragments, - fragment_checker: Default::default(), + fragment_checker: FragmentChecker::new(), }) } } @@ -687,7 +687,7 @@ impl Client { /// Checks a `file` URI's fragment. pub async fn check_fragment(&self, path: &Path, uri: &Uri) -> Status { - match self.fragment_checker.check(path, uri).await { + match self.fragment_checker.check(path, &uri.url).await { Ok(true) => Status::Ok(StatusCode::OK), Ok(false) => ErrorKind::InvalidFragment(uri.clone()).into(), Err(err) => { diff --git a/lychee-lib/src/extract/markdown.rs b/lychee-lib/src/extract/markdown.rs index 82bd243b9f..b5a0a751e7 100644 --- a/lychee-lib/src/extract/markdown.rs +++ b/lychee-lib/src/extract/markdown.rs @@ -99,11 +99,13 @@ pub(crate) fn extract_markdown_fragments(input: &str) -> HashSet { out.insert(frag.to_string()); } - let id = id_generator.generate(&mut heading); - out.insert(id); + if !heading.is_empty() { + let id = id_generator.generate(&mut heading); + out.insert(id); + heading.clear(); + } in_heading = false; - heading.clear(); } Event::Text(text) => { if in_heading { diff --git a/lychee-lib/src/utils/fragment_checker.rs b/lychee-lib/src/utils/fragment_checker.rs index 13ddc72748..5b394c32b2 100644 --- a/lychee-lib/src/utils/fragment_checker.rs +++ b/lychee-lib/src/utils/fragment_checker.rs @@ -4,21 +4,39 @@ use std::{ sync::Arc, }; -use crate::{extract::markdown::extract_markdown_fragments, types::FileType, Result, Uri}; +use crate::{extract::markdown::extract_markdown_fragments, types::FileType, Result}; use tokio::{fs, sync::Mutex}; use url::Url; +/// Holds a cache of fragments for a given URL. +/// +/// Fragments, also known as anchors, are used to link to a specific +/// part of a page. For example, the URL `https://example.com#foo` +/// will link to the element with the `id` of `foo`. +/// +/// This cache is used to avoid having to re-parse the same file +/// multiple times when checking if a given URL contains a fragment. +/// +/// The cache is stored in a `HashMap` with the URL as the key and +/// a `HashSet` of fragments as the value. #[derive(Default, Clone, Debug)] pub(crate) struct FragmentChecker { cache: Arc>>>, } impl FragmentChecker { + /// Creates a new `FragmentChecker`. + pub(crate) fn new() -> Self { + Self { + cache: Default::default(), + } + } + /// Checks if the given path contains the given fragment. - pub(crate) async fn check(&self, path: &Path, uri: &Uri) -> Result { - match (FileType::from(path), uri.url.fragment()) { + pub(crate) async fn check(&self, path: &Path, url: &Url) -> Result { + match (FileType::from(path), url.fragment()) { (FileType::Markdown, Some(fragment)) => { - let url_without_frag = Self::remove_fragment(uri.url.clone()); + let url_without_frag = Self::remove_fragment(url.clone()); self.populate_cache_if_vacant(url_without_frag, path, fragment) .await } @@ -26,8 +44,7 @@ impl FragmentChecker { } } - fn remove_fragment(url: Url) -> String { - let mut url = url; + fn remove_fragment(mut url: Url) -> String { url.set_fragment(None); url.into() }