Skip to content

Commit

Permalink
feat: support prefix query on name field (#694)
Browse files Browse the repository at this point in the history
* feat: support prefix phase query on name field

* update changelog
  • Loading branch information
wsxiaoys authored Nov 3, 2023
1 parent acb3a33 commit 2adcc07
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## Notice
* llama.cpp backend (CPU, Metal) now requires a redownload of gguf model due to upstream format changes: https://github.com/TabbyML/tabby/pull/645 https://github.com/ggerganov/llama.cpp/pull/3252
* Due to indexing format changes, the `~/.tabby/index` needs to be manually removed before any further runs of `tabby scheduler`.

## Features

Expand Down
9 changes: 8 additions & 1 deletion crates/tabby-common/src/index.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use tantivy::{
tokenizer::{RegexTokenizer, RemoveLongFilter, TextAnalyzer},
tokenizer::{NgramTokenizer, RegexTokenizer, RemoveLongFilter, TextAnalyzer},
Index,
};

Expand All @@ -8,6 +8,7 @@ pub trait IndexExt {
}

pub static CODE_TOKENIZER: &str = "code";
pub static IDENTIFIER_TOKENIZER: &str = "identifier";

impl IndexExt for Index {
fn register_tokenizer(&self) {
Expand All @@ -16,5 +17,11 @@ impl IndexExt for Index {
.build();

self.tokenizers().register(CODE_TOKENIZER, code_tokenizer);

let identifier_tokenzier =
TextAnalyzer::builder(NgramTokenizer::prefix_only(2, 5).unwrap()).build();

self.tokenizers()
.register(IDENTIFIER_TOKENIZER, identifier_tokenzier);
}
}
11 changes: 9 additions & 2 deletions crates/tabby-scheduler/src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::fs;
use anyhow::Result;
use tabby_common::{
config::Config,
index::{IndexExt, CODE_TOKENIZER},
index::{IndexExt, CODE_TOKENIZER, IDENTIFIER_TOKENIZER},
path::index_dir,
SourceFile,
};
Expand All @@ -29,10 +29,17 @@ pub fn index_repositories(_config: &Config) -> Result<()> {
.set_indexing_options(code_indexing_options)
.set_stored();

let name_indexing_options = TextFieldIndexing::default()
.set_tokenizer(IDENTIFIER_TOKENIZER)
.set_index_option(tantivy::schema::IndexRecordOption::WithFreqsAndPositions);
let name_options = TextOptions::default()
.set_indexing_options(name_indexing_options)
.set_stored();

let field_git_url = builder.add_text_field("git_url", STRING | STORED);
let field_filepath = builder.add_text_field("filepath", STRING | STORED);
let field_language = builder.add_text_field("language", STRING | STORED);
let field_name = builder.add_text_field("name", STRING | STORED);
let field_name = builder.add_text_field("name", name_options);
let field_kind = builder.add_text_field("kind", STRING | STORED);
let field_body = builder.add_text_field("body", code_options);

Expand Down

0 comments on commit 2adcc07

Please sign in to comment.