diff --git a/pg_search/src/api/index.rs b/pg_search/src/api/index.rs index 8a847a393..18b1c3fd7 100644 --- a/pg_search/src/api/index.rs +++ b/pg_search/src/api/index.rs @@ -235,6 +235,25 @@ pub fn fuzzy_term( } } +#[pg_extern(immutable, parallel_safe)] +pub fn fuzzy_phrase( + field: String, + value: String, + distance: default!(Option<i32>, "NULL"), + transposition_cost_one: default!(Option<bool>, "NULL"), + prefix: default!(Option<bool>, "NULL"), + match_all_terms: default!(Option<bool>, "NULL"), +) -> SearchQueryInput { + SearchQueryInput::FuzzyPhrase { + field, + value, + distance: distance.map(|n| n as u8), + transposition_cost_one, + prefix, + match_all_terms, + } +} + #[pg_extern(name = "more_like_this", immutable, parallel_safe)] pub fn more_like_this_empty() -> SearchQueryInput { panic!("more_like_this must be called with either with_document_id or with_document_fields"); diff --git a/pg_search/src/query/mod.rs b/pg_search/src/query/mod.rs index ed7ecb21d..9b79df339 100644 --- a/pg_search/src/query/mod.rs +++ b/pg_search/src/query/mod.rs @@ -73,6 +73,14 @@ pub enum SearchQueryInput { transposition_cost_one: Option<bool>, prefix: Option<bool>, }, + FuzzyPhrase { + field: String, + value: String, + distance: Option<u8>, + transposition_cost_one: Option<bool>, + prefix: Option<bool>, + match_all_terms: Option<bool>, + }, MoreLikeThis { min_doc_frequency: Option<u64>, max_doc_frequency: Option<u64>, @@ -317,6 +325,50 @@ impl SearchQueryInput { ))) } } + Self::FuzzyPhrase { + field, + value, + distance, + transposition_cost_one, + prefix, + match_all_terms, + } => { + let distance = distance.unwrap_or(2); + let transposition_cost_one = transposition_cost_one.unwrap_or(true); + let match_all_terms = match_all_terms.unwrap_or(false); + let prefix = prefix.unwrap_or(false); + + let field = field_lookup + .as_str(&field) + .ok_or_else(|| QueryError::WrongFieldType(field.clone()))?; + + let mut analyzer = searcher.index().tokenizer_for_field(field)?; + let mut stream = analyzer.token_stream(&value); + let mut terms = Vec::new(); + + while stream.advance() { + let token = stream.token().text.clone(); + let term = Term::from_field_text(field, &token); + let term_query: Box<dyn Query> = if prefix { + Box::new(FuzzyTermQuery::new_prefix( + term, + distance, + transposition_cost_one, + )) + } else { + Box::new(FuzzyTermQuery::new(term, distance, transposition_cost_one)) + }; + let occur = if match_all_terms { + Occur::Must + } else { + Occur::Should + }; + + terms.push((occur, term_query)); + } + + Ok(Box::new(BooleanQuery::new(terms))) + } Self::MoreLikeThis { min_doc_frequency, max_doc_frequency, diff --git a/pg_search/tests/query.rs b/pg_search/tests/query.rs index ba6085dfd..cc8392982 100644 --- a/pg_search/tests/query.rs +++ b/pg_search/tests/query.rs @@ -45,7 +45,7 @@ fn boolean_tree(mut conn: PgConnection) { } #[rstest] -fn fuzzy_fields(mut conn: PgConnection) { +fn fuzzy_term(mut conn: PgConnection) { SimpleProductsTable::setup().execute(&mut conn); let columns: SimpleProductsTableVec = r#" SELECT * FROM bm25_search.search( @@ -995,3 +995,32 @@ fn more_like_this_timetz_key(mut conn: PgConnection) { .fetch_collect(&mut conn); assert_eq!(rows.len(), 2); } + +#[rstest] +fn fuzzy_phrase(mut conn: PgConnection) { + SimpleProductsTable::setup().execute(&mut conn); + + let columns: SimpleProductsTableVec = r#" + SELECT * FROM bm25_search.search( + query => paradedb.fuzzy_phrase(field => 'description', value => 'ruling shoeez'), + stable_sort => true + )"# + .fetch_collect(&mut conn); + assert_eq!(columns.id, vec![3, 4, 5]); + + let columns: SimpleProductsTableVec = r#" + SELECT * FROM bm25_search.search( + query => paradedb.fuzzy_phrase(field => 'description', value => 'ruling shoeez', match_all_terms => true), + stable_sort => true + )"# + .fetch_collect(&mut conn); + assert_eq!(columns.id, vec![3]); + + let columns: SimpleProductsTableVec = r#" + SELECT * FROM bm25_search.search( + query => paradedb.fuzzy_phrase(field => 'description', value => 'ruling shoeez', distance => 1), + stable_sort => true + )"# + .fetch_collect(&mut conn); + assert_eq!(columns.id.len(), 0); +}