Skip to content

Commit

Permalink
fix: http-api-binding also need to read model prompt definition
Browse files Browse the repository at this point in the history
If don't use model's prompt template, the output would not be expected.

Signed-off-by: Lei Wen <[email protected]>
  • Loading branch information
wenlei03 committed Nov 3, 2023
1 parent e4efcc4 commit c88b8f4
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 25 deletions.
14 changes: 2 additions & 12 deletions crates/http-api-bindings/README.md
Original file line number Diff line number Diff line change
@@ -1,21 +1,11 @@
## Examples

```bash
export MODEL_ID="code-gecko"
export PROJECT_ID="$(gcloud config get project)"
export API_ENDPOINT="https://us-central1-aiplatform.googleapis.com/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/${MODEL_ID}:predict"
export AUTHORIZATION="Bearer $(gcloud auth print-access-token)"

cargo run --example simple
```

## Usage

```bash
export MODEL_ID="code-gecko"
export PROJECT_ID="$(gcloud config get project)"
export API_ENDPOINT="https://us-central1-aiplatform.googleapis.com/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/${MODEL_ID}:predict"
export AUTHORIZATION="Bearer $(gcloud auth print-access-token)"
export TABBY_CONFIG=/the_dir_where_your_model_located/tabby.json

cargo run serve --device experimental-http --model "{\"kind\": \"vertex-ai\", \"api_endpoint\": \"$API_ENDPOINT\", \"authorization\": \"$AUTHORIZATION\"}"
cargo run serve --device experimental-http --model "{\"kind\": \"vertex-ai\", \"api_endpoint\": \"$API_ENDPOINT\", \"authorization\": \"$AUTHORIZATION\", \"tabby_config\": \"$TABBY_CONFIG\"}"
```
4 changes: 0 additions & 4 deletions crates/http-api-bindings/src/fastchat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,6 @@ impl FastChatEngine {
client,
}
}

pub fn prompt_template() -> String {
"{prefix}<MID>{suffix}".to_owned()
}
}

#[async_trait]
Expand Down
5 changes: 3 additions & 2 deletions crates/http-api-bindings/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@ use vertex_ai::VertexAIEngine;
pub fn create(model: &str) -> (Box<dyn TextGeneration>, String) {
let params = serde_json::from_str(model).expect("Failed to parse model string");
let kind = get_param(&params, "kind");
let metafile = get_param(&params, "tabby_config");
if kind == "vertex-ai" {
let api_endpoint = get_param(&params, "api_endpoint");
let authorization = get_param(&params, "authorization");
let engine = Box::new(VertexAIEngine::create(
api_endpoint.as_str(),
authorization.as_str(),
));
(engine, VertexAIEngine::prompt_template())
(engine, metafile)
} else if kind == "fastchat" {
let model_name = get_param(&params, "model_name");
let api_endpoint = get_param(&params, "api_endpoint");
Expand All @@ -26,7 +27,7 @@ pub fn create(model: &str) -> (Box<dyn TextGeneration>, String) {
model_name.as_str(),
authorization.as_str(),
));
(engine, FastChatEngine::prompt_template())
(engine, metafile)
} else {
panic!("Only vertex_ai and fastchat are supported for http backend");
}
Expand Down
4 changes: 0 additions & 4 deletions crates/http-api-bindings/src/vertex_ai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,6 @@ impl VertexAIEngine {
client,
}
}

pub fn prompt_template() -> String {
"{prefix}<MID>{suffix}".to_owned()
}
}

#[async_trait]
Expand Down
8 changes: 5 additions & 3 deletions crates/tabby/src/serve/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,14 @@ pub async fn create_engine(
)
}
} else {
let (engine, prompt_template) = http_api_bindings::create(model_id);
let (engine, metafile) = http_api_bindings::create(model);
let metadata: Metadata = serdeconv::from_json_file(&metafile)
.unwrap_or_else(|_| fatal!("Invalid metadata file: {}", metafile));
(
engine,
EngineInfo {
prompt_template: Some(prompt_template),
chat_template: None,
prompt_template: metadata.prompt_template,
chat_template: metadata.chat_template,
},
)
}
Expand Down

0 comments on commit c88b8f4

Please sign in to comment.