diff --git a/CHANGELOG.md b/CHANGELOG.md index ae2666f75a3f..5f74cbcb34bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## Notice * llama.cpp backend (CPU, Metal) now requires a redownload of gguf model due to upstream format changes: https://github.com/TabbyML/tabby/pull/645 https://github.com/ggerganov/llama.cpp/pull/3252 +* Due to indexing format changes, the `~/.tabby/index` needs to be manually removed before any further runs of `tabby scheduler`. ## Features diff --git a/crates/tabby-common/src/index.rs b/crates/tabby-common/src/index.rs index 1d726b30ded1..6bf7eec939b1 100644 --- a/crates/tabby-common/src/index.rs +++ b/crates/tabby-common/src/index.rs @@ -1,5 +1,5 @@ use tantivy::{ - tokenizer::{RegexTokenizer, RemoveLongFilter, TextAnalyzer}, + tokenizer::{NgramTokenizer, RegexTokenizer, RemoveLongFilter, TextAnalyzer}, Index, }; @@ -8,6 +8,7 @@ pub trait IndexExt { } pub static CODE_TOKENIZER: &str = "code"; +pub static IDENTIFIER_TOKENIZER: &str = "identifier"; impl IndexExt for Index { fn register_tokenizer(&self) { @@ -16,5 +17,11 @@ impl IndexExt for Index { .build(); self.tokenizers().register(CODE_TOKENIZER, code_tokenizer); + + let identifier_tokenzier = + TextAnalyzer::builder(NgramTokenizer::prefix_only(2, 5).unwrap()).build(); + + self.tokenizers() + .register(IDENTIFIER_TOKENIZER, identifier_tokenzier); } } diff --git a/crates/tabby-scheduler/src/index.rs b/crates/tabby-scheduler/src/index.rs index ce4a17904f08..ba052f036b49 100644 --- a/crates/tabby-scheduler/src/index.rs +++ b/crates/tabby-scheduler/src/index.rs @@ -3,7 +3,7 @@ use std::fs; use anyhow::Result; use tabby_common::{ config::Config, - index::{IndexExt, CODE_TOKENIZER}, + index::{IndexExt, CODE_TOKENIZER, IDENTIFIER_TOKENIZER}, path::index_dir, SourceFile, }; @@ -29,10 +29,17 @@ pub fn index_repositories(_config: &Config) -> Result<()> { .set_indexing_options(code_indexing_options) .set_stored(); + let name_indexing_options = TextFieldIndexing::default() + .set_tokenizer(IDENTIFIER_TOKENIZER) + .set_index_option(tantivy::schema::IndexRecordOption::WithFreqsAndPositions); + let name_options = TextOptions::default() + .set_indexing_options(name_indexing_options) + .set_stored(); + let field_git_url = builder.add_text_field("git_url", STRING | STORED); let field_filepath = builder.add_text_field("filepath", STRING | STORED); let field_language = builder.add_text_field("language", STRING | STORED); - let field_name = builder.add_text_field("name", STRING | STORED); + let field_name = builder.add_text_field("name", name_options); let field_kind = builder.add_text_field("kind", STRING | STORED); let field_body = builder.add_text_field("body", code_options);