TabbyML · leiwen83 · Nov 3, 2023
diff --git a/crates/http-api-bindings/README.md b/crates/http-api-bindings/README.md
@@ -1,21 +1,11 @@
-## Examples
-
-```bash
-export MODEL_ID="code-gecko"
-export PROJECT_ID="$(gcloud config get project)"
-export API_ENDPOINT="https://us-central1-aiplatform.googleapis.com/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/${MODEL_ID}:predict"
-export AUTHORIZATION="Bearer $(gcloud auth print-access-token)"
-
-cargo run --example simple
-```
-
 ## Usage
 
 ```bash
 export MODEL_ID="code-gecko"
 export PROJECT_ID="$(gcloud config get project)"
 export API_ENDPOINT="https://us-central1-aiplatform.googleapis.com/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/${MODEL_ID}:predict"
 export AUTHORIZATION="Bearer $(gcloud auth print-access-token)"
+export TABBY_CONFIG=/the_dir_where_your_model_located/tabby.json
 
-cargo run serve --device experimental-http --model "{\"kind\": \"vertex-ai\", \"api_endpoint\": \"$API_ENDPOINT\", \"authorization\": \"$AUTHORIZATION\"}"
+cargo run serve --device experimental-http --model "{\"kind\": \"vertex-ai\", \"api_endpoint\": \"$API_ENDPOINT\", \"authorization\": \"$AUTHORIZATION\", \"tabby_config\": \"$TABBY_CONFIG\"}"
 ```
diff --git a/crates/http-api-bindings/src/fastchat.rs b/crates/http-api-bindings/src/fastchat.rs
@@ -8,7 +8,7 @@ use tabby_inference::{helpers, TextGeneration, TextGenerationOptions};
 #[derive(Serialize)]
 struct Request {
     model: String,
-    prompt: Vec<String>,
+    prompt: String,
     max_tokens: usize,
     temperature: f32,
 }
@@ -49,19 +49,14 @@ impl FastChatEngine {
             client,
         }
     }
-
-    pub fn prompt_template() -> String {
-        "{prefix}<MID>{suffix}".to_owned()
-    }
 }
 
 #[async_trait]
 impl TextGeneration for FastChatEngine {
     async fn generate(&self, prompt: &str, options: TextGenerationOptions) -> String {
-        let tokens: Vec<&str> = prompt.split("<MID>").collect();
         let request = Request {
             model: self.model_name.to_owned(),
-            prompt: vec![tokens[0].to_owned()],
+            prompt: prompt.to_string(),
             max_tokens: options.max_decoding_length,
             temperature: options.sampling_temperature,
         };

diff --git a/crates/http-api-bindings/src/lib.rs b/crates/http-api-bindings/src/lib.rs
@@ -9,14 +9,15 @@ use vertex_ai::VertexAIEngine;
 pub fn create(model: &str) -> (Box<dyn TextGeneration>, String) {
     let params = serde_json::from_str(model).expect("Failed to parse model string");
     let kind = get_param(&params, "kind");
+    let metafile = get_param(&params, "tabby_config");
     if kind == "vertex-ai" {
         let api_endpoint = get_param(&params, "api_endpoint");
         let authorization = get_param(&params, "authorization");
         let engine = Box::new(VertexAIEngine::create(
             api_endpoint.as_str(),
             authorization.as_str(),
         ));
-        (engine, VertexAIEngine::prompt_template())
+        (engine, metafile)
     } else if kind == "fastchat" {
         let model_name = get_param(&params, "model_name");
         let api_endpoint = get_param(&params, "api_endpoint");
@@ -26,7 +27,7 @@ pub fn create(model: &str) -> (Box<dyn TextGeneration>, String) {
             model_name.as_str(),
             authorization.as_str(),
         ));
-        (engine, FastChatEngine::prompt_template())
+        (engine, metafile)
     } else {
         panic!("Only vertex_ai and fastchat are supported for http backend");
     }

diff --git a/crates/http-api-bindings/src/vertex_ai.rs b/crates/http-api-bindings/src/vertex_ai.rs
@@ -57,10 +57,6 @@ impl VertexAIEngine {
             client,
         }
     }
-
-    pub fn prompt_template() -> String {
-        "{prefix}<MID>{suffix}".to_owned()
-    }
 }
 
 #[async_trait]

diff --git a/crates/tabby/src/serve/engine.rs b/crates/tabby/src/serve/engine.rs
@@ -33,14 +33,9 @@ pub async fn create_engine(
             )
         }
     } else {
-        let (engine, prompt_template) = http_api_bindings::create(model_id);
-        (
-            engine,
-            EngineInfo {
-                prompt_template: Some(prompt_template),
-                chat_template: None,
-            },
-        )
+        let (engine, metafile) = http_api_bindings::create(model_id);
+        let engine_info = EngineInfo::read(PathBuf::from(metafile));
+        (engine, engine_info)
     }
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -57,10 +57,6 @@ impl VertexAIEngine { @@
                 client,
             }
         }
-        pub fn prompt_template() -> String {
-            "{prefix}<MID>{suffix}".to_owned()
-        }
     }
     #[async_trait]
@@ Expand Down @@