Skip to content

Commit

Permalink
feat(fulltext_index): allow enable full-text index in SQL and gRPC way (
Browse files Browse the repository at this point in the history
#4310)

* feat(fulltext_index): allow enable full-text index in SQL and gRPC way

Signed-off-by: Zhenchi <[email protected]>

* fix: typo

Signed-off-by: Zhenchi <[email protected]>

* chore: polish

Signed-off-by: Zhenchi <[email protected]>

* fix: test_fulltext_intm_path

Signed-off-by: Zhenchi <[email protected]>

* address comments

Signed-off-by: Zhenchi <[email protected]>

* refactor: explicitly build column options

Signed-off-by: Zhenchi <[email protected]>

* test: fix error msg

Signed-off-by: Zhenchi <[email protected]>

* fix: address comments

Signed-off-by: Zhenchi <[email protected]>

* fix: polish

Signed-off-by: Zhenchi <[email protected]>

---------

Signed-off-by: Zhenchi <[email protected]>
  • Loading branch information
zhongzc authored Jul 8, 2024
1 parent 81308b9 commit 0030821
Show file tree
Hide file tree
Showing 44 changed files with 1,053 additions and 255 deletions.
23 changes: 10 additions & 13 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ etcd-client = { git = "https://github.com/MichaelScofield/etcd-client.git", rev
fst = "0.4.7"
futures = "0.3"
futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "a70a6af9c69e40f9a918936a48717343402b4393" }
greptime-proto = { git = "https://github.com/zhongzc/greptime-proto.git", branch = "zhongzc/fulltext-options" }
humantime = "2.1"
humantime-serde = "1.1"
itertools = "0.10"
Expand Down
1 change: 1 addition & 0 deletions src/api/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ datatypes.workspace = true
greptime-proto.workspace = true
paste = "1.0"
prost.workspace = true
serde_json.workspace = true
snafu.workspace = true

[build-dependencies]
Expand Down
12 changes: 11 additions & 1 deletion src/api/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,23 @@ pub enum Error {
location: Location,
source: datatypes::error::Error,
},

#[snafu(display("Failed to serialize JSON"))]
SerializeJson {
#[snafu(source)]
error: serde_json::Error,
#[snafu(implicit)]
location: Location,
},
}

impl ErrorExt for Error {
fn status_code(&self) -> StatusCode {
match self {
Error::UnknownColumnDataType { .. } => StatusCode::InvalidArguments,
Error::IntoColumnDataType { .. } => StatusCode::Unexpected,
Error::IntoColumnDataType { .. } | Error::SerializeJson { .. } => {
StatusCode::Unexpected
}
Error::ConvertColumnDefaultConstraint { source, .. }
| Error::InvalidColumnDefaultConstraint { source, .. } => source.status_code(),
}
Expand Down
1 change: 1 addition & 0 deletions src/api/src/helper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1843,6 +1843,7 @@ mod tests {
null_mask: vec![2],
datatype: ColumnDataType::Boolean as i32,
datatype_extension: None,
options: None,
};
assert!(is_column_type_value_eq(
column1.datatype,
Expand Down
2 changes: 2 additions & 0 deletions src/api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#![feature(let_chains)]

pub mod error;
pub mod helper;

Expand Down
160 changes: 150 additions & 10 deletions src/api/src/v1/column_def.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,19 @@

use std::collections::HashMap;

use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, COMMENT_KEY};
use datatypes::schema::{
ColumnDefaultConstraint, ColumnSchema, FulltextOptions, COMMENT_KEY, FULLTEXT_KEY,
};
use snafu::ResultExt;

use crate::error::{self, Result};
use crate::helper::ColumnDataTypeWrapper;
use crate::v1::ColumnDef;
use crate::v1::{ColumnDef, ColumnOptions, SemanticType};

/// Key used to store fulltext options in gRPC column options.
const FULLTEXT_GRPC_KEY: &str = "fulltext";

/// Tries to construct a `ColumnSchema` from the given `ColumnDef`.
pub fn try_as_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> {
let data_type = ColumnDataTypeWrapper::try_new(
column_def.data_type,
Expand All @@ -43,13 +49,147 @@ pub fn try_as_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> {
if !column_def.comment.is_empty() {
metadata.insert(COMMENT_KEY.to_string(), column_def.comment.clone());
}
if let Some(options) = column_def.options.as_ref()
&& let Some(fulltext) = options.options.get(FULLTEXT_GRPC_KEY)
{
metadata.insert(FULLTEXT_KEY.to_string(), fulltext.to_string());
}

ColumnSchema::new(&column_def.name, data_type.into(), column_def.is_nullable)
.with_metadata(metadata)
.with_time_index(column_def.semantic_type() == SemanticType::Timestamp)
.with_default_constraint(constraint)
.context(error::InvalidColumnDefaultConstraintSnafu {
column: &column_def.name,
})
}

/// Constructs a `ColumnOptions` from the given `ColumnSchema`.
pub fn options_from_column_schema(column_schema: &ColumnSchema) -> Option<ColumnOptions> {
let mut options = ColumnOptions::default();
if let Some(fulltext) = column_schema.metadata().get(FULLTEXT_KEY) {
options
.options
.insert(FULLTEXT_GRPC_KEY.to_string(), fulltext.to_string());
}

(!options.options.is_empty()).then_some(options)
}

/// Checks if the `ColumnOptions` contains fulltext options.
pub fn contains_fulltext(options: &Option<ColumnOptions>) -> bool {
options
.as_ref()
.map_or(false, |o| o.options.contains_key(FULLTEXT_GRPC_KEY))
}

/// Tries to construct a `ColumnOptions` from the given `FulltextOptions`.
pub fn options_from_fulltext(fulltext: &FulltextOptions) -> Result<Option<ColumnOptions>> {
let mut options = ColumnOptions::default();

let v = serde_json::to_string(fulltext).context(error::SerializeJsonSnafu)?;
options.options.insert(FULLTEXT_GRPC_KEY.to_string(), v);

Ok((!options.options.is_empty()).then_some(options))
}

#[cfg(test)]
mod tests {

use datatypes::data_type::ConcreteDataType;
use datatypes::schema::FulltextAnalyzer;

use super::*;
use crate::v1::ColumnDataType;

#[test]
fn test_try_as_column_schema() {
let column_def = ColumnDef {
name: "test".to_string(),
data_type: ColumnDataType::String as i32,
is_nullable: true,
default_constraint: ColumnDefaultConstraint::Value("test_default".into())
.try_into()
.unwrap(),
semantic_type: SemanticType::Field as i32,
comment: "test_comment".to_string(),
datatype_extension: None,
options: Some(ColumnOptions {
options: HashMap::from([(
FULLTEXT_GRPC_KEY.to_string(),
"{\"enable\":true}".to_string(),
)]),
}),
};

let schema = try_as_column_schema(&column_def).unwrap();
assert_eq!(schema.name, "test");
assert_eq!(schema.data_type, ConcreteDataType::string_datatype());
assert!(!schema.is_time_index());
assert!(schema.is_nullable());
assert_eq!(
schema.default_constraint().unwrap(),
&ColumnDefaultConstraint::Value("test_default".into())
);
assert_eq!(schema.metadata().get(COMMENT_KEY).unwrap(), "test_comment");
assert_eq!(
schema.fulltext_options().unwrap().unwrap(),
FulltextOptions {
enable: true,
..Default::default()
}
);
}

#[test]
fn test_options_from_column_schema() {
let schema = ColumnSchema::new("test", ConcreteDataType::string_datatype(), true);
let options = options_from_column_schema(&schema);
assert!(options.is_none());

let schema = ColumnSchema::new("test", ConcreteDataType::string_datatype(), true)
.with_fulltext_options(FulltextOptions {
enable: true,
analyzer: FulltextAnalyzer::English,
case_sensitive: false,
})
.unwrap();
let options = options_from_column_schema(&schema).unwrap();
assert_eq!(
options.options.get(FULLTEXT_GRPC_KEY).unwrap(),
"{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false}"
);
}

#[test]
fn test_options_with_fulltext() {
let fulltext = FulltextOptions {
enable: true,
analyzer: FulltextAnalyzer::English,
case_sensitive: false,
};
let options = options_from_fulltext(&fulltext).unwrap().unwrap();
assert_eq!(
options.options.get(FULLTEXT_GRPC_KEY).unwrap(),
"{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false}"
);
}

Ok(
ColumnSchema::new(&column_def.name, data_type.into(), column_def.is_nullable)
.with_default_constraint(constraint)
.context(error::InvalidColumnDefaultConstraintSnafu {
column: &column_def.name,
})?
.with_metadata(metadata),
)
#[test]
fn test_contains_fulltext() {
let options = ColumnOptions {
options: HashMap::from([(
FULLTEXT_GRPC_KEY.to_string(),
"{\"enable\":true}".to_string(),
)]),
};
assert!(contains_fulltext(&Some(options)));

let options = ColumnOptions {
options: HashMap::new(),
};
assert!(!contains_fulltext(&Some(options)));

assert!(!contains_fulltext(&None));
}
}
1 change: 1 addition & 0 deletions src/common/grpc-expr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ common-macro.workspace = true
common-query.workspace = true
common-time.workspace = true
datatypes.workspace = true
prost.workspace = true
snafu.workspace = true
table.workspace = true

Expand Down
24 changes: 24 additions & 0 deletions src/common/grpc-expr/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

use std::any::Any;

use api::v1::ColumnDataType;
use common_error::ext::ErrorExt;
use common_error::status_code::StatusCode;
use common_macro::stack_trace_debug;
Expand Down Expand Up @@ -104,6 +105,25 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},

#[snafu(display("Unknown proto column datatype: {}", datatype))]
UnknownColumnDataType {
datatype: i32,
#[snafu(implicit)]
location: Location,
#[snafu(source)]
error: prost::DecodeError,
},

#[snafu(display(
"Fulltext index only supports string type, column: {column_name}, unexpected type: {column_type:?}"
))]
InvalidFulltextColumnType {
column_name: String,
column_type: ColumnDataType,
#[snafu(implicit)]
location: Location,
},
}

pub type Result<T> = std::result::Result<T, Error>;
Expand All @@ -124,6 +144,10 @@ impl ErrorExt for Error {
Error::UnexpectedValuesLength { .. } | Error::UnknownLocationType { .. } => {
StatusCode::InvalidArguments
}

Error::UnknownColumnDataType { .. } | Error::InvalidFulltextColumnType { .. } => {
StatusCode::InvalidArguments
}
}
}

Expand Down
1 change: 1 addition & 0 deletions src/common/grpc-expr/src/insert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,7 @@ mod tests {
scale: 10,
})),
}),
options: None,
};

(
Expand Down
Loading

0 comments on commit 0030821

Please sign in to comment.