diff --git a/frontends/search/src/components/ResultsPage.tsx b/frontends/search/src/components/ResultsPage.tsx
index 370cd69e44..e503a160f5 100644
--- a/frontends/search/src/components/ResultsPage.tsx
+++ b/frontends/search/src/components/ResultsPage.tsx
@@ -294,6 +294,8 @@ const ResultsPage = (props: ResultsPageProps) => {
max: props.search.debounced.twoTypoWordRangeMax,
},
disable_on_word: props.search.debounced.disableOnWords,
+ prioritize_domain_specifc_words:
+ props.search.debounced.prioritize_domain_specifc_words,
},
highlight_options: {
highlight_results: props.search.debounced.highlightResults ?? true,
diff --git a/frontends/search/src/components/SearchForm.tsx b/frontends/search/src/components/SearchForm.tsx
index 055b20c134..e7b6ee504b 100644
--- a/frontends/search/src/components/SearchForm.tsx
+++ b/frontends/search/src/components/SearchForm.tsx
@@ -1058,6 +1058,7 @@ const SearchForm = (props: {
twoTypoWordRangeMax: null,
disableOnWords: [],
typoTolerance: false,
+ prioritize_domain_specifc_words: true,
highlightResults: true,
highlightDelimiters: ["?", ".", "!"],
highlightMaxLength: 8,
@@ -1219,6 +1220,26 @@ const SearchForm = (props: {
}}
/>
+
{
oneTypoWordRangeMax: state.oneTypoWordRangeMax?.toString() ?? "8",
twoTypoWordRangeMin: state.twoTypoWordRangeMin.toString(),
twoTypoWordRangeMax: state.twoTypoWordRangeMax?.toString() ?? "",
+ prioritize_domain_specifc_words:
+ state.prioritize_domain_specifc_words?.toString() ?? "",
disableOnWords: state.disableOnWords.join(","),
highlightStrategy: state.highlightStrategy,
highlightResults: state.highlightResults.toString(),
@@ -184,6 +188,8 @@ const fromParamsToState = (
oneTypoWordRangeMax: parseIntOrNull(params.oneTypoWordRangeMax),
twoTypoWordRangeMin: parseInt(params.oneTypoWordRangeMin ?? "8"),
twoTypoWordRangeMax: parseIntOrNull(params.twoTypoWordRangeMax),
+ prioritize_domain_specifc_words:
+ (params.prioritize_domain_specifc_words ?? "true") === "true",
disableOnWords: params.disableOnWords?.split(",") ?? [],
highlightResults: (params.highlightResults ?? "true") === "true",
highlightStrategy: isHighlightStrategy(params.highlightStrategy)
diff --git a/server/Cargo.lock b/server/Cargo.lock
index 1057137923..af4b2f8097 100644
--- a/server/Cargo.lock
+++ b/server/Cargo.lock
@@ -731,12 +731,6 @@ dependencies = [
"serde",
]
-[[package]]
-name = "bit-vec"
-version = "0.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d2c54ff287cfc0a34f38a6b832ea1bd8e448a330b3e40a50859e6488bee07f22"
-
[[package]]
name = "bitflags"
version = "1.3.2"
@@ -758,15 +752,6 @@ dependencies = [
"crunchy",
]
-[[package]]
-name = "bktree"
-version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0bb1e744816f6a3b9e962186091867f3e5959d4dac995777ec254631cb00b21c"
-dependencies = [
- "num",
-]
-
[[package]]
name = "blake2b_simd"
version = "1.0.2"
@@ -800,17 +785,6 @@ dependencies = [
"generic-array",
]
-[[package]]
-name = "bloomfilter"
-version = "1.0.14"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc0bdbcf2078e0ba8a74e1fe0cf36f54054a04485759b61dfd60b174658e9607"
-dependencies = [
- "bit-vec",
- "getrandom 0.2.15",
- "siphasher 1.0.1",
-]
-
[[package]]
name = "brotli"
version = "6.0.0"
@@ -2966,30 +2940,6 @@ dependencies = [
"winapi",
]
-[[package]]
-name = "num"
-version = "0.4.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23"
-dependencies = [
- "num-bigint",
- "num-complex",
- "num-integer",
- "num-iter",
- "num-rational",
- "num-traits",
-]
-
-[[package]]
-name = "num-bigint"
-version = "0.4.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
-dependencies = [
- "num-integer",
- "num-traits",
-]
-
[[package]]
name = "num-bigint-dig"
version = "0.8.4"
@@ -3042,17 +2992,6 @@ dependencies = [
"num-traits",
]
-[[package]]
-name = "num-rational"
-version = "0.4.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
-dependencies = [
- "num-bigint",
- "num-integer",
- "num-traits",
-]
-
[[package]]
name = "num-traits"
version = "0.2.19"
@@ -3422,7 +3361,7 @@ version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
dependencies = [
- "siphasher 0.3.11",
+ "siphasher",
]
[[package]]
@@ -3431,7 +3370,7 @@ version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b"
dependencies = [
- "siphasher 0.3.11",
+ "siphasher",
]
[[package]]
@@ -4953,12 +4892,6 @@ version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
-[[package]]
-name = "siphasher"
-version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
-
[[package]]
name = "sketches-ddsketch"
version = "0.2.2"
@@ -5791,9 +5724,7 @@ dependencies = [
"base64 0.22.1",
"bb8-redis",
"bincode",
- "bktree",
"blake3",
- "bloomfilter",
"cfg-if",
"chm",
"chrono",
diff --git a/server/Cargo.toml b/server/Cargo.toml
index b45d3692a5..9b2baed702 100644
--- a/server/Cargo.toml
+++ b/server/Cargo.toml
@@ -162,12 +162,10 @@ murmur3 = "0.5.2"
tantivy = "0.22.0"
strsim = "0.11.1"
levenshtein_automata = "0.2.1"
-bktree = "1.0.1"
flate2 = "1.0.31"
bincode = "1.3"
rayon = "1.10.0"
crossbeam = "0.8.4"
-bloomfilter = "1.0.14"
dashmap = "6.0.1"
diff --git a/server/src/data/models.rs b/server/src/data/models.rs
index 347863ee90..713113c1da 100644
--- a/server/src/data/models.rs
+++ b/server/src/data/models.rs
@@ -5296,6 +5296,8 @@ pub struct TypoOptions {
pub two_typo_word_range: Option
,
/// Words that should not be corrected. If not specified, this defaults to an empty list.
pub disable_on_word: Option>,
+ /// Auto-require non-english words present in the dataset to exist in each results chunk_html text. If not specified, this defaults to true.
+ pub prioritize_domain_specifc_words: Option,
}
#[derive(Serialize, Deserialize, Debug, Clone, ToSchema, Default)]
diff --git a/server/src/operators/search_operator.rs b/server/src/operators/search_operator.rs
index 041ed91a42..fb9cfa4bee 100644
--- a/server/src/operators/search_operator.rs
+++ b/server/src/operators/search_operator.rs
@@ -1838,17 +1838,24 @@ pub async fn search_chunks_query(
timer.add("start correcting query");
match parsed_query {
ParsedQueryTypes::Single(ref mut query) => {
- corrected_query =
- correct_query(query.query.clone(), dataset.id, redis_pool, options).await?;
- query.query = corrected_query.clone().unwrap_or(query.query.clone());
+ let typo_corrected_query =
+ correct_query(query.clone(), dataset.id, redis_pool, options).await?;
+ if typo_corrected_query.corrected {
+ corrected_query.clone_from(&typo_corrected_query.query);
+ }
+ *query = typo_corrected_query.query.clone().unwrap_or(query.clone());
data.query = QueryTypes::Single(query.query.clone());
}
ParsedQueryTypes::Multi(ref mut queries) => {
for (query, _) in queries {
- corrected_query =
- correct_query(query.query.clone(), dataset.id, redis_pool.clone(), options)
+ let typo_corrected_query =
+ correct_query(query.clone(), dataset.id, redis_pool.clone(), options)
.await?;
- query.query = corrected_query.clone().unwrap_or(query.query.clone());
+ if typo_corrected_query.corrected {
+ corrected_query.clone_from(&typo_corrected_query.query);
+ }
+ *query = typo_corrected_query.query.clone().unwrap_or(query.clone());
+ *query = corrected_query.clone().unwrap_or(query.clone());
}
}
}
@@ -1953,7 +1960,7 @@ pub async fn search_chunks_query(
timer.add("reranking");
transaction.finish();
- result_chunks.corrected_query = corrected_query;
+ result_chunks.corrected_query = corrected_query.map(|c| c.query);
Ok(result_chunks)
}
@@ -1986,13 +1993,16 @@ pub async fn search_hybrid_chunks(
if let Some(options) = &data.typo_options {
timer.add("start correcting query");
- corrected_query =
- correct_query(parsed_query.query.clone(), dataset.id, redis_pool, options).await?;
- parsed_query.query = corrected_query
+ let typo_corrected_query =
+ correct_query(parsed_query.clone(), dataset.id, redis_pool, options).await?;
+ if typo_corrected_query.corrected {
+ corrected_query.clone_from(&typo_corrected_query.query);
+ }
+ parsed_query = typo_corrected_query
+ .query
.clone()
- .unwrap_or(parsed_query.query.clone());
+ .unwrap_or(parsed_query.clone());
data.query = QueryTypes::Single(parsed_query.query.clone());
-
timer.add("corrected query");
}
@@ -2123,7 +2133,7 @@ pub async fn search_hybrid_chunks(
SearchChunkQueryResponseBody {
score_chunks: reranked_chunks,
- corrected_query,
+ corrected_query: corrected_query.map(|c| c.query),
total_chunk_pages: result_chunks.total_chunk_pages,
}
};
@@ -2178,17 +2188,23 @@ pub async fn search_groups_query(
timer.add("start correcting query");
match parsed_query {
ParsedQueryTypes::Single(ref mut query) => {
- corrected_query =
- correct_query(query.query.clone(), dataset.id, redis_pool, options).await?;
- query.query = corrected_query.clone().unwrap_or(query.query.clone());
+ let typo_corrected_query =
+ correct_query(query.clone(), dataset.id, redis_pool.clone(), options).await?;
+ if typo_corrected_query.corrected {
+ corrected_query.clone_from(&typo_corrected_query.query);
+ }
+ *query = typo_corrected_query.query.clone().unwrap_or(query.clone());
data.query = QueryTypes::Single(query.query.clone());
}
ParsedQueryTypes::Multi(ref mut queries) => {
for (query, _) in queries {
- corrected_query =
- correct_query(query.query.clone(), dataset.id, redis_pool.clone(), options)
+ let typo_corrected_query =
+ correct_query(query.clone(), dataset.id, redis_pool.clone(), options)
.await?;
- query.query = corrected_query.clone().unwrap_or(query.query.clone());
+ if typo_corrected_query.corrected {
+ corrected_query.clone_from(&typo_corrected_query.query);
+ }
+ *query = typo_corrected_query.query.clone().unwrap_or(query.clone());
}
}
}
@@ -2279,7 +2295,7 @@ pub async fn search_groups_query(
Ok(SearchWithinGroupResults {
bookmarks: result_chunks.score_chunks,
group,
- corrected_query,
+ corrected_query: corrected_query.map(|c| c.query),
total_pages: result_chunks.total_chunk_pages,
})
}
@@ -2303,13 +2319,16 @@ pub async fn search_hybrid_groups(
if let Some(options) = &data.typo_options {
timer.add("start correcting query");
- corrected_query =
- correct_query(parsed_query.query.clone(), dataset.id, redis_pool, options).await?;
- parsed_query.query = corrected_query
+ let typo_corrected_query =
+ correct_query(parsed_query.clone(), dataset.id, redis_pool, options).await?;
+ if typo_corrected_query.corrected {
+ corrected_query.clone_from(&typo_corrected_query.query);
+ }
+ parsed_query = typo_corrected_query
+ .query
.clone()
- .unwrap_or(parsed_query.query.clone());
+ .unwrap_or(parsed_query.clone());
data.query = QueryTypes::Single(parsed_query.query.clone());
-
timer.add("corrected query");
}
@@ -2472,7 +2491,7 @@ pub async fn search_hybrid_groups(
Ok(SearchWithinGroupResults {
bookmarks: reranked_chunks.score_chunks,
group,
- corrected_query,
+ corrected_query: corrected_query.map(|c| c.query),
total_pages: result_chunks.total_chunk_pages,
})
}
@@ -2496,17 +2515,23 @@ pub async fn semantic_search_over_groups(
timer.add("start correcting query");
match parsed_query {
ParsedQueryTypes::Single(ref mut query) => {
- corrected_query =
- correct_query(query.query.clone(), dataset.id, redis_pool, options).await?;
- query.query = corrected_query.clone().unwrap_or(query.query.clone());
+ let typo_corrected_query =
+ correct_query(query.clone(), dataset.id, redis_pool.clone(), options).await?;
+ if typo_corrected_query.corrected {
+ corrected_query.clone_from(&typo_corrected_query.query);
+ }
+ *query = typo_corrected_query.query.clone().unwrap_or(query.clone());
data.query = QueryTypes::Single(query.query.clone());
}
ParsedQueryTypes::Multi(ref mut queries) => {
for (query, _) in queries {
- corrected_query =
- correct_query(query.query.clone(), dataset.id, redis_pool.clone(), options)
+ let typo_corrected_query =
+ correct_query(query.clone(), dataset.id, redis_pool.clone(), options)
.await?;
- query.query = corrected_query.clone().unwrap_or(query.query.clone());
+ if typo_corrected_query.corrected {
+ corrected_query.clone_from(&typo_corrected_query.query);
+ }
+ *query = typo_corrected_query.query.clone().unwrap_or(query.clone());
}
}
}
@@ -2564,7 +2589,7 @@ pub async fn semantic_search_over_groups(
timer.add("fetched from postgres");
//TODO: rerank for groups
- result_chunks.corrected_query = corrected_query;
+ result_chunks.corrected_query = corrected_query.map(|c| c.query);
Ok(result_chunks)
}
@@ -2598,17 +2623,23 @@ pub async fn full_text_search_over_groups(
timer.add("start correcting query");
match parsed_query {
ParsedQueryTypes::Single(ref mut query) => {
- corrected_query =
- correct_query(query.query.clone(), dataset.id, redis_pool, options).await?;
- query.query = corrected_query.clone().unwrap_or(query.query.clone());
+ let typo_corrected_query =
+ correct_query(query.clone(), dataset.id, redis_pool.clone(), options).await?;
+ if typo_corrected_query.corrected {
+ corrected_query.clone_from(&typo_corrected_query.query);
+ }
+ *query = typo_corrected_query.query.clone().unwrap_or(query.clone());
data.query = QueryTypes::Single(query.query.clone());
}
ParsedQueryTypes::Multi(ref mut queries) => {
for (query, _) in queries {
- corrected_query =
- correct_query(query.query.clone(), dataset.id, redis_pool.clone(), options)
+ let typo_corrected_query =
+ correct_query(query.clone(), dataset.id, redis_pool.clone(), options)
.await?;
- query.query = corrected_query.clone().unwrap_or(query.query.clone());
+ if typo_corrected_query.corrected {
+ corrected_query.clone_from(&typo_corrected_query.query);
+ }
+ *query = typo_corrected_query.query.clone().unwrap_or(query.clone());
}
}
}
@@ -2654,7 +2685,7 @@ pub async fn full_text_search_over_groups(
timer.add("fetched from postgres");
//TODO: rerank for groups
- result_groups_with_chunk_hits.corrected_query = corrected_query;
+ result_groups_with_chunk_hits.corrected_query = corrected_query.map(|c| c.query);
Ok(result_groups_with_chunk_hits)
}
@@ -2736,13 +2767,16 @@ pub async fn hybrid_search_over_groups(
if let Some(options) = &data.typo_options {
timer.add("start correcting query");
- corrected_query =
- correct_query(parsed_query.query.clone(), dataset.id, redis_pool, options).await?;
- parsed_query.query = corrected_query
+ let typo_corrected_query =
+ correct_query(parsed_query.clone(), dataset.id, redis_pool, options).await?;
+ if typo_corrected_query.corrected {
+ corrected_query.clone_from(&typo_corrected_query.query);
+ }
+ parsed_query = typo_corrected_query
+ .query
.clone()
- .unwrap_or(parsed_query.query.clone());
+ .unwrap_or(parsed_query.clone());
data.query = QueryTypes::Single(parsed_query.query.clone());
-
timer.add("corrected query");
}
@@ -2873,7 +2907,7 @@ pub async fn hybrid_search_over_groups(
let result_chunks = DeprecatedSearchOverGroupsResponseBody {
group_chunks: reranked_chunks,
- corrected_query,
+ corrected_query: corrected_query.map(|c| c.query),
total_chunk_pages: combined_search_chunk_query_results.total_chunk_pages,
};
@@ -2899,13 +2933,16 @@ pub async fn autocomplete_chunks_query(
if let Some(options) = &data.typo_options {
timer.add("start correcting query");
- corrected_query =
- correct_query(parsed_query.query.clone(), dataset.id, redis_pool, options).await?;
- parsed_query.query = corrected_query
+ let typo_corrected_query =
+ correct_query(parsed_query.clone(), dataset.id, redis_pool, options).await?;
+ if typo_corrected_query.corrected {
+ corrected_query.clone_from(&typo_corrected_query.query);
+ }
+ parsed_query = typo_corrected_query
+ .query
.clone()
- .unwrap_or(parsed_query.query.clone());
+ .unwrap_or(parsed_query.clone());
data.query.clone_from(&parsed_query.query);
-
timer.add("corrected query");
}
@@ -3045,7 +3082,7 @@ pub async fn autocomplete_chunks_query(
timer.add("reranking");
transaction.finish();
- result_chunks.corrected_query = corrected_query;
+ result_chunks.corrected_query = corrected_query.map(|c| c.query);
Ok(result_chunks)
}
diff --git a/server/src/operators/typo_operator.rs b/server/src/operators/typo_operator.rs
index e7d7ca42a0..db20cc954b 100644
--- a/server/src/operators/typo_operator.rs
+++ b/server/src/operators/typo_operator.rs
@@ -9,9 +9,9 @@ use std::{
use crate::{
data::models::{RedisPool, TypoOptions, TypoRange},
errors::ServiceError,
+ handlers::chunk_handler::ParsedQuery,
};
use actix_web::web;
-use bloomfilter::Bloom;
use dashmap::DashMap;
use flate2::{
write::{GzDecoder, GzEncoder},
@@ -22,9 +22,6 @@ use rayon::prelude::*;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::collections::VecDeque;
-const BLOOM_SIZE: usize = 10_000_000; // 10 million bits
-const BLOOM_FP_RATE: f64 = 0.01; // 1% false positive rate
-
#[derive(Clone, Debug, Eq, PartialEq)]
struct Node {
word: String,
@@ -425,24 +422,26 @@ pub struct BKTreeCache {
lazy_static! {
static ref BKTREE_CACHE: BKTreeCache = BKTreeCache::new();
- static ref ENGLISH_WORDS: Bloom = {
- let words = include_str!("../words.txt");
- let mut bloom = Bloom::new_for_fp_rate(BLOOM_SIZE, BLOOM_FP_RATE);
- for word in words.lines() {
- bloom.set(&word.to_lowercase());
- }
- bloom
+ static ref ENGLISH_WORDS: HashSet = {
+ include_str!("../words.txt")
+ .lines()
+ .map(|s| s.to_lowercase())
+ .collect()
};
static ref PREFIX_TRIE: Trie = {
let prefixes = vec![
- "un", "re", "in", "im", "il", "ir", "dis", "en", "em", "non", "pre", "pro", "anti",
+ "anti", "auto", "de", "dis", "down", "extra", "hyper", "il", "im", "in", "ir", "inter",
+ "mega", "mid", "mis", "non", "over", "out", "post", "pre", "pro", "re", "semi", "sub",
+ "super", "tele", "trans", "ultra", "un", "under", "up",
];
Trie::new(&prefixes)
};
static ref SUFFIX_TRIE: Trie = {
let suffixes = vec![
- "ing", "ed", "er", "est", "ly", "ity", "y", "ous", "ful", "less", "ness", "ion",
- "tion", "ation", "able", "ible", "al", "ial", "ive", "ative", "itive",
+ "able", "al", "ance", "ation", "ative", "ed", "en", "ence", "ent", "er", "es", "est",
+ "ful", "ian", "ible", "ic", "ing", "ion", "ious", "ise", "ish", "ism", "ist", "ity",
+ "ive", "ize", "less", "ly", "ment", "ness", "or", "ous", "s", "sion", "tion", "ty",
+ "y",
];
Trie::new(&suffixes)
};
@@ -567,9 +566,15 @@ impl BKTreeCache {
}
}
-fn correct_query_helper(tree: &BkTree, query: String, options: &TypoOptions) -> Option {
- let query_words: Vec<&str> = query.split_whitespace().collect();
+fn correct_query_helper(
+ tree: &BkTree,
+ mut query: ParsedQuery,
+ options: &TypoOptions,
+) -> CorrectedQuery {
+ let query_words: Vec<&str> = query.query.split_whitespace().collect();
let mut corrections = HashMap::new();
+ let mut new_quote_words = Vec::new();
+
let excluded_words: HashSet<_> = options
.disable_on_word
.clone()
@@ -606,6 +611,16 @@ fn correct_query_helper(tree: &BkTree, query: String, options: &TypoOptions) ->
}
if !tree.find(word.to_string(), 0).is_empty() {
+ if options.prioritize_domain_specifc_words.unwrap_or(true) {
+ new_quote_words.push(word);
+ query.quote_words = match query.quote_words {
+ Some(mut existing_words) => {
+ existing_words.push(word.to_string());
+ Some(existing_words)
+ }
+ None => Some(vec![word.to_string()]),
+ };
+ }
continue;
}
@@ -649,14 +664,27 @@ fn correct_query_helper(tree: &BkTree, query: String, options: &TypoOptions) ->
}
}
- if corrections.is_empty() {
- None
+ if corrections.is_empty() && new_quote_words.is_empty() {
+ CorrectedQuery {
+ query: Some(query),
+ corrected: false,
+ }
} else {
- let mut corrected_query = query.to_string();
+ let mut corrected_query = query.query.clone();
+
for (original, correction) in corrections {
corrected_query = corrected_query.replace(original, &correction);
}
- Some(corrected_query)
+
+ for word in new_quote_words {
+ corrected_query = corrected_query.replace(word, &format!("\"{}\"", word));
+ }
+
+ query.query = corrected_query;
+ CorrectedQuery {
+ query: Some(query),
+ corrected: true,
+ }
}
}
@@ -684,20 +712,20 @@ fn is_best_correction(word: &str, correction: &str) -> bool {
}
fn is_likely_english_word(word: &str) -> bool {
- if ENGLISH_WORDS.check(&word.to_lowercase()) {
+ if ENGLISH_WORDS.contains(&word.to_lowercase()) {
return true;
}
// Check for prefix
if let Some(prefix_len) = PREFIX_TRIE.longest_prefix(word) {
- if ENGLISH_WORDS.check(&word[prefix_len..].to_lowercase()) {
+ if ENGLISH_WORDS.contains(&word[prefix_len..].to_lowercase()) {
return true;
}
}
// Check for suffix
if let Some(suffix_len) = SUFFIX_TRIE.longest_suffix(word) {
- if ENGLISH_WORDS.check(&word[..word.len() - suffix_len].to_lowercase()) {
+ if ENGLISH_WORDS.contains(&word[..word.len() - suffix_len].to_lowercase()) {
return true;
}
}
@@ -707,7 +735,7 @@ fn is_likely_english_word(word: &str) -> bool {
let parts: Vec<&str> = word.split('-').collect();
if parts
.iter()
- .all(|part| ENGLISH_WORDS.check(&part.to_lowercase()))
+ .all(|part| ENGLISH_WORDS.contains(&part.to_lowercase()))
{
return true;
}
@@ -716,15 +744,21 @@ fn is_likely_english_word(word: &str) -> bool {
false
}
+#[derive(Debug, Default)]
+pub struct CorrectedQuery {
+ pub query: Option,
+ pub corrected: bool,
+}
+
#[tracing::instrument(skip(redis_pool))]
pub async fn correct_query(
- query: String,
+ query: ParsedQuery,
dataset_id: uuid::Uuid,
redis_pool: web::Data,
options: &TypoOptions,
-) -> Result