From 72c0a2a5c6b70297df310d92731063d954cf59b1 Mon Sep 17 00:00:00 2001 From: Victorien Elvinger Date: Wed, 20 Nov 2024 22:16:13 +0100 Subject: [PATCH 1/3] feat(biome_glob): add dedicated crate for globs --- Cargo.lock | 12 ++ Cargo.toml | 1 + crates/biome_glob/Cargo.toml | 27 +++ .../src/lib.rs} | 171 +++++++++++------- crates/biome_js_analyze/Cargo.toml | 1 + .../src/assists/source/organize_imports.rs | 4 +- .../correctness/no_undeclared_dependencies.rs | 7 +- crates/biome_js_analyze/src/utils.rs | 1 - knope.toml | 4 + 9 files changed, 160 insertions(+), 68 deletions(-) create mode 100644 crates/biome_glob/Cargo.toml rename crates/{biome_js_analyze/src/utils/restricted_glob.rs => biome_glob/src/lib.rs} (75%) diff --git a/Cargo.lock b/Cargo.lock index 3dad933c42c9..befef393073d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -499,6 +499,17 @@ dependencies = [ "tracing", ] +[[package]] +name = "biome_glob" +version = "0.1.0" +dependencies = [ + "biome_deserialize", + "biome_text_size", + "globset", + "schemars", + "serde", +] + [[package]] name = "biome_graphql_analyze" version = "0.0.1" @@ -741,6 +752,7 @@ dependencies = [ "biome_deserialize", "biome_deserialize_macros", "biome_diagnostics", + "biome_glob", "biome_js_factory", "biome_js_parser", "biome_js_semantic", diff --git a/Cargo.toml b/Cargo.toml index 5a7aa249e558..952aa031b727 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -115,6 +115,7 @@ biome_diagnostics_categories = { version = "0.5.7", path = "./crates/biome_diagn biome_diagnostics_macros = { version = "0.5.7", path = "./crates/biome_diagnostics_macros" } biome_formatter = { version = "0.5.7", path = "./crates/biome_formatter" } biome_fs = { version = "0.5.7", path = "./crates/biome_fs" } +biome_glob = { version = "0.1.0", path = "./crates/biome_glob" } biome_graphql_analyze = { version = "0.0.1", path = "./crates/biome_graphql_analyze" } biome_graphql_factory = { version = "0.1.0", path = "./crates/biome_graphql_factory" } biome_graphql_formatter = { version = "0.1.0", path = "./crates/biome_graphql_formatter" } diff --git a/crates/biome_glob/Cargo.toml b/crates/biome_glob/Cargo.toml new file mode 100644 index 000000000000..6f10ce4a6c94 --- /dev/null +++ b/crates/biome_glob/Cargo.toml @@ -0,0 +1,27 @@ + +[package] +authors.workspace = true +categories.workspace = true +description = "" +edition.workspace = true +homepage.workspace = true +keywords.workspace = true +license.workspace = true +name = "biome_glob" +repository.workspace = true +version = "0.1.0" + +[lints] +workspace = true + +[dependencies] +biome_deserialize = { workspace = true, optional = true } +biome_text_size = { workspace = true, optional = true } +globset = { workspace = true } +schemars = { workspace = true, optional = true } +serde = { workspace = true, optional = true } + +[features] +biome_deserialize = ["dep:biome_deserialize", "dep:biome_text_size"] +schemars = ["dep:schemars"] +serde = ["dep:serde"] diff --git a/crates/biome_js_analyze/src/utils/restricted_glob.rs b/crates/biome_glob/src/lib.rs similarity index 75% rename from crates/biome_js_analyze/src/utils/restricted_glob.rs rename to crates/biome_glob/src/lib.rs index c0b0e6e62c0e..ae2fcaa9d059 100644 --- a/crates/biome_js_analyze/src/utils/restricted_glob.rs +++ b/crates/biome_glob/src/lib.rs @@ -1,31 +1,86 @@ -use biome_rowan::{TextRange, TextSize}; +//! biome_glob provides a glob and glob list with exceptions matching. +//! +//! ## Matching a path against a glob +//! +//! You can create a glob from a string using [core::str::FromStr::from_str] or the corresponding method `parse`. +//! A glob can match against anything that can be turned into a [std::path::Path]. +//! This is for example the aqcse of strings. +//! +//! ``` +//! use biome_glob::Glob; +//! +//! let glob = "*.rs".parse::().expect("correct glob"); +//! assert!(glob.is_match("lib.rs")); +//! assert!(!glob.is_match("src/lib.rs")); +//! ``` +//! +//! ## Matching against multiple globs +//! +//! When a path is expected to be matched against several globs, +//! you should compile the path into a [CandidatePath]. +//! [CandidatePath] may speed up matching against several globs. +//! +//! ``` +//! use biome_glob::{CandidatePath, Glob}; +//! +//! let globs: &[Glob] = &[ +//! "**/*.rs".parse().expect("correct glob"), +//! "**/*.txt".parse().expect("correct glob"), +//! ]; +//! +//! let path = CandidatePath::new(&"a/path/to/file.txt"); +//! +//! assert!(globs.iter().any(|glob| path.matches(glob))); +//! ``` +//! +//! ## Matching against multiple globs and exceptions +//! +//! biome_glob supports negated globs, which are particularly useful for encoding exceptions. +//! In the following example we accept all files in the `src` dierctory, except the ones ending with the `txt` extension. +//! +//! ``` +//! use biome_glob::{CandidatePath, Glob}; +//! +//! let globs: &[Glob] = &[ +//! "**/*.rs".parse().expect("correct glob"), +//! "!**/*.txt".parse().expect("correct glob"), +//! ]; +//! +//! let path = CandidatePath::new(&"a/path/to/file.txt"); +//! +//! assert!(!path.matches_with_exceptions(globs)); +//! ``` +//! +//! ## Supported syntax +//! +//! A Biome glob pattern supports the following syntaxes: +//! +//! - star `*` that matches zero or more characters inside a path segment +//! - globstar `**` that matches zero or more path segments +//! - Use `\*` to escape `*` +//! - `?`, `[`, `]`, `{`, and `}` must be escaped using `\`. +//! These characters are reserved for future use. +//! - Use `!` as first character to negate the glob +//! +//! A path segment is delimited by path separator `/` or the start/end of the path. +//! -/// A restricted glob pattern only supports the following syntaxes: -/// -/// - star `*` that matches zero or more characters inside a path segment -/// - globstar `**` that matches zero or more path segments -/// - Use `\*` to escape `*` -/// - `?`, `[`, `]`, `{`, and `}` must be escaped using `\`. -/// These characters are reserved for future use. -/// - Use `!` as first character to negate the glob -/// -/// A path segment is delimited by path separator `/` or the start/end of the path. -#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] -#[serde(try_from = "String", into = "String")] -pub struct RestrictedGlob { +/// A Biome glob pattern. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +#[cfg_attr(feature = "serde", serde(try_from = "String", into = "String"))] +pub struct Glob { is_negated: bool, glob: globset::GlobMatcher, } -impl RestrictedGlob { +impl Glob { /// Returns `true` if this glob is negated. /// /// ``` - /// use biome_js_analyze::utils::restricted_glob::RestrictedGlob; - /// - /// let glob = "!*.js".parse::().unwrap(); + /// let glob = "!*.js".parse::().unwrap(); /// assert!(glob.is_negated()); /// - /// let glob = "*.js".parse::().unwrap(); + /// let glob = "*.js".parse::().unwrap(); /// assert!(!glob.is_negated()); /// ``` pub fn is_negated(&self) -> bool { @@ -52,31 +107,31 @@ impl RestrictedGlob { self.glob.is_match_candidate(&path.0) } } -impl PartialEq for RestrictedGlob { +impl PartialEq for Glob { fn eq(&self, other: &Self) -> bool { self.is_negated == other.is_negated && self.glob.glob() == other.glob.glob() } } -impl Eq for RestrictedGlob {} -impl std::hash::Hash for RestrictedGlob { +impl Eq for Glob {} +impl std::hash::Hash for Glob { fn hash(&self, state: &mut H) { self.is_negated.hash(state); self.glob.glob().hash(state); } } -impl std::fmt::Display for RestrictedGlob { +impl std::fmt::Display for Glob { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let repr = self.glob.glob(); let negation = if self.is_negated { "!" } else { "" }; write!(f, "{negation}{repr}") } } -impl From for String { - fn from(value: RestrictedGlob) -> Self { +impl From for String { + fn from(value: Glob) -> Self { value.to_string() } } -impl std::str::FromStr for RestrictedGlob { +impl std::str::FromStr for Glob { type Err = RestrictedGlobError; fn from_str(value: &str) -> Result { let (is_negated, value) = if let Some(stripped) = value.strip_prefix('!') { @@ -91,7 +146,7 @@ impl std::str::FromStr for RestrictedGlob { // Only `**` can match `/` glob_builder.literal_separator(true); match glob_builder.build() { - Ok(glob) => Ok(RestrictedGlob { + Ok(glob) => Ok(Glob { is_negated, glob: glob.compile_matcher(), }), @@ -101,14 +156,15 @@ impl std::str::FromStr for RestrictedGlob { } } } -impl TryFrom for RestrictedGlob { +impl TryFrom for Glob { type Error = RestrictedGlobError; fn try_from(value: String) -> Result { value.parse() } } // We use a custom impl to precisely report the location of the error. -impl biome_deserialize::Deserializable for RestrictedGlob { +#[cfg(feature = "biome_deserialize")] +impl biome_deserialize::Deserializable for Glob { fn deserialize( value: &impl biome_deserialize::DeserializableValue, name: &str, @@ -120,7 +176,10 @@ impl biome_deserialize::Deserializable for RestrictedGlob { Err(error) => { let range = value.range(); let range = error.index().map_or(range, |index| { - TextRange::at(range.start() + TextSize::from(1 + index), 1u32.into()) + biome_text_size::TextRange::at( + range.start() + biome_text_size::TextSize::from(1 + index), + 1u32.into(), + ) }); diagnostics.push( biome_deserialize::DeserializationDiagnostic::new(format_args!("{error}")) @@ -132,7 +191,7 @@ impl biome_deserialize::Deserializable for RestrictedGlob { } } #[cfg(feature = "schemars")] -impl schemars::JsonSchema for RestrictedGlob { +impl schemars::JsonSchema for Glob { fn schema_name() -> String { "Regex".to_string() } @@ -156,7 +215,7 @@ impl<'a> CandidatePath<'a> { } /// Tests whether the current path matches `glob`. - pub fn matches(&self, glob: &RestrictedGlob) -> bool { + pub fn matches(&self, glob: &Glob) -> bool { glob.is_match_candidate(self) } @@ -165,9 +224,9 @@ impl<'a> CandidatePath<'a> { /// Let's take an example: /// /// ``` - /// use biome_js_analyze::utils::restricted_glob::{CandidatePath, RestrictedGlob}; + /// use biome_glob::{CandidatePath, Glob}; /// - /// let globs: &[RestrictedGlob] = &[ + /// let globs: &[Glob] = &[ /// "*".parse().unwrap(), /// "!a*".parse().unwrap(), /// "a".parse().unwrap(), @@ -189,7 +248,7 @@ impl<'a> CandidatePath<'a> { /// pub fn matches_with_exceptions<'b, I>(&self, globs: I) -> bool where - I: IntoIterator, + I: IntoIterator, I::IntoIter: DoubleEndedIterator, { self.matches_with_exceptions_or(false, globs) @@ -203,9 +262,9 @@ impl<'a> CandidatePath<'a> { /// /// /// ``` - /// use biome_js_analyze::utils::restricted_glob::{CandidatePath, RestrictedGlob}; + /// use biome_glob::{CandidatePath, Glob}; /// - /// let globs: &[RestrictedGlob] = &[ + /// let globs: &[Glob] = &[ /// "a/path".parse().unwrap(), /// "!b".parse().unwrap(), /// ]; @@ -222,7 +281,7 @@ impl<'a> CandidatePath<'a> { /// ``` pub fn matches_directory_with_exceptions<'b, I>(&self, globs: I) -> bool where - I: IntoIterator, + I: IntoIterator, I::IntoIter: DoubleEndedIterator, { self.matches_with_exceptions_or(true, globs) @@ -232,7 +291,7 @@ impl<'a> CandidatePath<'a> { /// Returns `default` if there is no globs that match. fn matches_with_exceptions_or<'b, I>(&self, default: bool, globs: I) -> bool where - I: IntoIterator, + I: IntoIterator, I::IntoIter: DoubleEndedIterator, { // Iterate in reverse order to avoid unnecessary glob matching. @@ -379,15 +438,9 @@ mod tests { #[test] fn test_restricted_regex() { - assert!(!"*.js" - .parse::() - .unwrap() - .is_match("file/path.js")); + assert!(!"*.js".parse::().unwrap().is_match("file/path.js")); - assert!("**/*.js" - .parse::() - .unwrap() - .is_match("file/path.js")); + assert!("**/*.js".parse::().unwrap().is_match("file/path.js")); } #[test] @@ -395,29 +448,23 @@ mod tests { let a = CandidatePath::new(&"a"); assert!(a.matches_with_exceptions(&[ - RestrictedGlob::from_str("*").unwrap(), - RestrictedGlob::from_str("!b").unwrap(), + Glob::from_str("*").unwrap(), + Glob::from_str("!b").unwrap(), ])); assert!(!a.matches_with_exceptions(&[ - RestrictedGlob::from_str("*").unwrap(), - RestrictedGlob::from_str("!a*").unwrap(), + Glob::from_str("*").unwrap(), + Glob::from_str("!a*").unwrap(), ])); assert!(a.matches_with_exceptions(&[ - RestrictedGlob::from_str("*").unwrap(), - RestrictedGlob::from_str("!a*").unwrap(), - RestrictedGlob::from_str("a").unwrap(), + Glob::from_str("*").unwrap(), + Glob::from_str("!a*").unwrap(), + Glob::from_str("a").unwrap(), ])); } #[test] fn test_to_string() { - assert_eq!( - RestrictedGlob::from_str("**/*.js").unwrap().to_string(), - "**/*.js" - ); - assert_eq!( - RestrictedGlob::from_str("!**/*.js").unwrap().to_string(), - "!**/*.js" - ); + assert_eq!(Glob::from_str("**/*.js").unwrap().to_string(), "**/*.js"); + assert_eq!(Glob::from_str("!**/*.js").unwrap().to_string(), "!**/*.js"); } } diff --git a/crates/biome_js_analyze/Cargo.toml b/crates/biome_js_analyze/Cargo.toml index 3b334405c13c..8415b0138653 100644 --- a/crates/biome_js_analyze/Cargo.toml +++ b/crates/biome_js_analyze/Cargo.toml @@ -19,6 +19,7 @@ biome_control_flow = { workspace = true } biome_deserialize = { workspace = true, features = ["smallvec"] } biome_deserialize_macros = { workspace = true } biome_diagnostics = { workspace = true } +biome_glob = { workspace = true, features = ["biome_deserialize", "schemars", "serde"] } biome_js_factory = { workspace = true } biome_js_semantic = { workspace = true } biome_js_syntax = { workspace = true } diff --git a/crates/biome_js_analyze/src/assists/source/organize_imports.rs b/crates/biome_js_analyze/src/assists/source/organize_imports.rs index 412f71d22971..56b83d78bc95 100644 --- a/crates/biome_js_analyze/src/assists/source/organize_imports.rs +++ b/crates/biome_js_analyze/src/assists/source/organize_imports.rs @@ -7,7 +7,7 @@ use biome_deserialize_macros::Deserializable; use biome_js_syntax::JsModule; use biome_rowan::BatchMutationExt; -use crate::{utils::restricted_glob::RestrictedGlob, JsRuleAction}; +use crate::JsRuleAction; pub mod legacy; pub mod util; @@ -94,7 +94,7 @@ pub struct Options { #[serde(untagged)] pub enum ImportGroup { Predefined(PredefinedImportGroup), - Custom(RestrictedGlob), + Custom(biome_glob::Glob), } impl Deserializable for ImportGroup { fn deserialize( diff --git a/crates/biome_js_analyze/src/lint/correctness/no_undeclared_dependencies.rs b/crates/biome_js_analyze/src/lint/correctness/no_undeclared_dependencies.rs index 503e554938b5..b02f4fd35d2b 100644 --- a/crates/biome_js_analyze/src/lint/correctness/no_undeclared_dependencies.rs +++ b/crates/biome_js_analyze/src/lint/correctness/no_undeclared_dependencies.rs @@ -7,7 +7,6 @@ use biome_deserialize_macros::Deserializable; use biome_js_syntax::{AnyJsImportClause, AnyJsImportLike}; use biome_rowan::AstNode; -use crate::utils::restricted_glob::{CandidatePath, RestrictedGlob}; use crate::{globals::is_node_builtin_module, services::manifest::Manifest}; declare_lint_rule! { @@ -95,7 +94,7 @@ enum DependencyAvailability { Bool(bool), /// Dependencies are available in files that matches any of the globs. - Patterns(Box<[RestrictedGlob]>), + Patterns(Box<[biome_glob::Glob]>), } impl Default for DependencyAvailability { @@ -166,7 +165,9 @@ impl DependencyAvailability { fn is_available(&self, path: &Path) -> bool { match self { Self::Bool(b) => *b, - Self::Patterns(globs) => CandidatePath::new(&path).matches_with_exceptions(globs), + Self::Patterns(globs) => { + biome_glob::CandidatePath::new(&path).matches_with_exceptions(globs) + } } } } diff --git a/crates/biome_js_analyze/src/utils.rs b/crates/biome_js_analyze/src/utils.rs index 7c1a4693212a..1bd8375a4df1 100644 --- a/crates/biome_js_analyze/src/utils.rs +++ b/crates/biome_js_analyze/src/utils.rs @@ -4,7 +4,6 @@ use std::iter; pub mod batch; pub mod rename; -pub mod restricted_glob; pub mod restricted_regex; #[cfg(test)] pub mod tests; diff --git a/knope.toml b/knope.toml index 71f83cb5e8e0..9779d5b2b2f4 100644 --- a/knope.toml +++ b/knope.toml @@ -215,6 +215,10 @@ versioned_files = ["crates/biome_graphql_semantic/Cargo.toml"] changelog = "crates/biome_css_semantic/CHANGELOG.md" versioned_files = ["crates/biome_css_semantic/Cargo.toml"] +[packages.biome_glob] +changelog = "crates/biome_glob/CHANGELOG.md" +versioned_files = ["crates/biome_glob/Cargo.toml"] + ## End of crates. DO NOT CHANGE! # Workflow to create a changeset From 89d930ef48bc20d729bddc6de2d4ab14047b5482 Mon Sep 17 00:00:00 2001 From: Victorien Elvinger Date: Thu, 21 Nov 2024 14:12:42 +0100 Subject: [PATCH 2/3] Update crates/biome_glob/src/lib.rs Co-authored-by: Emanuele Stoppa --- crates/biome_glob/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/biome_glob/src/lib.rs b/crates/biome_glob/src/lib.rs index ae2fcaa9d059..e8a883b3b99a 100644 --- a/crates/biome_glob/src/lib.rs +++ b/crates/biome_glob/src/lib.rs @@ -1,4 +1,4 @@ -//! biome_glob provides a glob and glob list with exceptions matching. +//! biome_glob provides a globbing functionality. When listing the globs to match, it also possible to provide globs that function as "expectations" by prefixing the globs with `!`. //! //! ## Matching a path against a glob //! From 03e4540d9f58e727e3c3efaee9bd45cf092a5257 Mon Sep 17 00:00:00 2001 From: Victorien Elvinger Date: Thu, 21 Nov 2024 14:13:17 +0100 Subject: [PATCH 3/3] Apply suggestions from code review Co-authored-by: Emanuele Stoppa --- crates/biome_glob/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/biome_glob/src/lib.rs b/crates/biome_glob/src/lib.rs index e8a883b3b99a..33fd8b11854a 100644 --- a/crates/biome_glob/src/lib.rs +++ b/crates/biome_glob/src/lib.rs @@ -4,7 +4,7 @@ //! //! You can create a glob from a string using [core::str::FromStr::from_str] or the corresponding method `parse`. //! A glob can match against anything that can be turned into a [std::path::Path]. -//! This is for example the aqcse of strings. +//! This is, for example, the access of strings. //! //! ``` //! use biome_glob::Glob; @@ -36,7 +36,7 @@ //! ## Matching against multiple globs and exceptions //! //! biome_glob supports negated globs, which are particularly useful for encoding exceptions. -//! In the following example we accept all files in the `src` dierctory, except the ones ending with the `txt` extension. +//! In the following example, we accept all files in the `src` directory, except the ones ending with the `txt` extension. //! //! ``` //! use biome_glob::{CandidatePath, Glob};