atoma-network · jorgeantonio21 · Apr 2, 2024 · Mar 23, 2024 · Mar 23, 2024 · Mar 25, 2024
diff --git a/Cargo.toml b/Cargo.toml
@@ -11,9 +11,16 @@ async-trait = "0.1.78"
 candle = { git = "https://github.com/huggingface/candle", package = "candle-core", version = "0.4.2" }
 candle-nn = { git = "https://github.com/huggingface/candle", package = "candle-nn", version = "0.4.2" }
 candle-transformers = { git = "https://github.com/huggingface/candle", package = "candle-transformers", version = "0.4.2" }
+config = "0.14.0"
 ed25519-consensus = "2.1.0"
+hf-hub = "0.3.2"
 serde = "1.0.197"
+serde_json = "1.0.114"
+rand = "0.8.5"
+reqwest = "0.12.1"
 thiserror = "1.0.58"
 tokenizers = "0.15.2"
 tokio = "1.36.0"
+toml = "0.8.12"
 tracing = "0.1.40"
+tracing-subscriber = "0.3.18"
diff --git a/atoma-inference/Cargo.toml b/atoma-inference/Cargo.toml
@@ -10,9 +10,19 @@ async-trait.workspace = true
 candle.workspace = true
 candle-nn.workspace = true
 candle-transformers.workspace = true
+config.true = true
 ed25519-consensus.workspace = true
+hf-hub.workspace = true
+reqwest = { workspace = true, features = ["json"] }
 serde = { workspace = true, features = ["derive"] }
+serde_json.workspace = true
 thiserror.workspace = true
 tokenizers.workspace = true
 tokio = { workspace = true, features = ["full", "tracing"] }
 tracing.workspace = true
+tracing-subscriber.workspace = true
+
+[dev-dependencies]
+rand.workspace = true
+toml.workspace = true
+
diff --git a/atoma-inference/src/apis/hugging_face.rs b/atoma-inference/src/apis/hugging_face.rs
@@ -0,0 +1,116 @@
+use std::path::PathBuf;
+
+use async_trait::async_trait;
+use hf_hub::api::sync::{Api, ApiBuilder};
+
+use crate::models::ModelType;
+
+use super::ApiTrait;
+
+struct FilePaths {
+    file_paths: Vec<String>,
+}
+
+impl ModelType {
+    fn get_hugging_face_model_path(&self) -> (String, FilePaths) {
+        match self {
+            Self::Llama2_7b => (
+                String::from("meta-llama/Llama-2-7b-hf"),
+                FilePaths {
+                    file_paths: vec![
+                        "model-00001-of-00002.safetensors".to_string(),
+                        "model-00002-of-00002.safetensors".to_string(),
+                    ],
+                },
+            ),
+            Self::Mamba3b => (
+                String::from("state-spaces/mamba-2.8b-hf"),
+                FilePaths {
+                    file_paths: vec![
+                        "model-00001-of-00003.safetensors".to_string(),
+                        "model-00002-of-00003.safetensors".to_string(),
+                        "model-00003-of-00003.safetensors".to_string(),
+                    ],
+                },
+            ),
+            Self::Mistral7b => (
+                String::from("mistralai/Mistral-7B-Instruct-v0.2"),
+                FilePaths {
+                    file_paths: vec![
+                        "model-00001-of-00003.safetensors".to_string(),
+                        "model-00002-of-00003.safetensors".to_string(),
+                        "model-00003-of-00003.safetensors".to_string(),
+                    ],
+                },
+            ),
+            Self::Mixtral8x7b => (
+                String::from("mistralai/Mixtral-8x7B-Instruct-v0.1"),
+                FilePaths {
+                    file_paths: vec![
+                        "model-00001-of-00019.safetensors".to_string(),
+                        "model-00002-of-00019.safetensors".to_string(),
+                        "model-00003-of-00019.safetensors".to_string(),
+                        "model-00004-of-00019.safetensors".to_string(),
+                        "model-00005-of-00019.safetensors".to_string(),
+                        "model-00006-of-00019.safetensors".to_string(),
+                        "model-00007-of-00019.safetensors".to_string(),
+                        "model-00008-of-00019.safetensors".to_string(),
+                        "model-00009-of-00019.safetensors".to_string(),
+                        "model-000010-of-00019.safetensors".to_string(),
+                        "model-000011-of-00019.safetensors".to_string(),
+                        "model-000012-of-00019.safetensors".to_string(),
+                        "model-000013-of-00019.safetensors".to_string(),
+                        "model-000014-of-00019.safetensors".to_string(),
+                        "model-000015-of-00019.safetensors".to_string(),
+                        "model-000016-of-00019.safetensors".to_string(),
+                        "model-000017-of-00019.safetensors".to_string(),
+                        "model-000018-of-00019.safetensors".to_string(),
+                        "model-000019-of-00019.safetensors".to_string(),
+                    ],
+                },
+            ),
+            Self::StableDiffusion2 => (
+                String::from("stabilityai/stable-diffusion-2"),
+                FilePaths {
+                    file_paths: vec!["768-v-ema.safetensors".to_string()],
+                },
+            ),
+            Self::StableDiffusionXl => (
+                String::from("stabilityai/stable-diffusion-xl-base-1.0"),
+                FilePaths {
+                    file_paths: vec![
+                        "sd_xl_base_1.0.safetensors".to_string(),
+                        "sd_xl_base_1.0_0.9vae.safetensors".to_string(),
+                        "sd_xl_offset_example-lora_1.0.safetensors".to_string(),
+                    ],
+                },
+            ),
+        }
+    }
+}
+
+#[async_trait]
+impl ApiTrait for Api {
+    fn create(api_key: String, cache_dir: PathBuf) -> Result<Self, super::ApiError>
+    where
+        Self: Sized,
+    {
+        Ok(ApiBuilder::new()
+            .with_progress(true)
+            .with_token(Some(api_key))
+            .with_cache_dir(cache_dir)
+            .build()?)
+    }
+
+    fn fetch(&self, model: ModelType) -> Result<Vec<PathBuf>, super::ApiError> {
+        let (model_path, files) = model.get_hugging_face_model_path();
+        let api_repo = self.model(model_path);
+        let mut path_bufs = Vec::with_capacity(files.file_paths.len());
+
+        for file in files.file_paths {
+            path_bufs.push(api_repo.get(&file)?);
+        }
+
+        Ok(path_bufs)
+    }
+}
diff --git a/atoma-inference/src/apis/mod.rs b/atoma-inference/src/apis/mod.rs
@@ -0,0 +1,29 @@
+pub mod hugging_face;
+use hf_hub::api::sync::ApiError as HuggingFaceError;
+
+use std::path::PathBuf;
+
+use thiserror::Error;
+
+use crate::models::ModelType;
+
+#[derive(Debug, Error)]
+pub enum ApiError {
+    #[error("Api Error: `{0}`")]
+    ApiError(String),
+    #[error("HuggingFace API error: `{0}`")]
+    HuggingFaceError(HuggingFaceError),
+}
+
+impl From<HuggingFaceError> for ApiError {
+    fn from(error: HuggingFaceError) -> Self {
+        Self::HuggingFaceError(error)
+    }
+}
+
+pub trait ApiTrait {
+    fn fetch(&self, model: ModelType) -> Result<Vec<PathBuf>, ApiError>;
+    fn create(api_key: String, cache_dir: PathBuf) -> Result<Self, ApiError>
+    where
+        Self: Sized;
+}
diff --git a/atoma-inference/src/config.rs b/atoma-inference/src/config.rs
@@ -1,59 +1,52 @@
 use std::path::PathBuf;
 
-use crate::{
-    models::ModelType,
-    specs::{HardwareSpec, SoftwareSpec},
-};
+use config::Config;
+use serde::Deserialize;
 
+use crate::{models::ModelType, types::PrecisionBits};
+
+#[derive(Debug, Deserialize)]
 pub struct InferenceConfig {
     api_key: String,
-    hardware_specs: HardwareSpec,
     models: Vec<ModelType>,
-    software_specs: SoftwareSpec,
-    storage_base_path: PathBuf,
+    precision: PrecisionBits,
+    storage_folder: PathBuf,
     tokenizer_file_path: PathBuf,
     tracing: bool,
+    use_kv_cache: Option<bool>,
 }
 
 impl InferenceConfig {
     pub fn new(
         api_key: String,
-        hardware_specs: HardwareSpec,
         models: Vec<ModelType>,
-        software_specs: SoftwareSpec,
-        storage_base_path: PathBuf,
+        precision: PrecisionBits,
+        storage_folder: PathBuf,
         tokenizer_file_path: PathBuf,
         tracing: bool,
+        use_kv_cache: Option<bool>,
     ) -> Self {
         Self {
             api_key,
-            hardware_specs,
             models,
-            software_specs,
-            storage_base_path,
+            precision,
+            storage_folder,
             tokenizer_file_path,
             tracing,
+            use_kv_cache,
         }
     }
 
     pub fn api_key(&self) -> String {
         self.api_key.clone()
     }
 
-    pub fn hardware(&self) -> HardwareSpec {
-        self.hardware_specs.clone()
-    }
-
     pub fn models(&self) -> Vec<ModelType> {
         self.models.clone()
     }
 
-    pub fn software(&self) -> SoftwareSpec {
-        self.software_specs.clone()
-    }
-
-    pub fn storage_base_path(&self) -> PathBuf {
-        self.storage_base_path.clone()
+    pub fn storage_folder(&self) -> PathBuf {
+        self.storage_folder.clone()
     }
 
     pub fn tokenizer_file_path(&self) -> PathBuf {
@@ -63,4 +56,24 @@ impl InferenceConfig {
     pub fn tracing(&self) -> bool {
         self.tracing
     }
+
+    pub fn precision_bits(&self) -> PrecisionBits {
+        self.precision
+    }
+
+    pub fn use_kv_cache(&self) -> Option<bool> {
+        self.use_kv_cache
+    }
+
+    pub fn from_file_path(config_file_path: PathBuf) -> Self {
+        let builder = Config::builder().add_source(config::File::with_name(
+            config_file_path.to_str().as_ref().unwrap(),
+        ));
+        let config = builder
+            .build()
+            .expect("Failed to generate inference configuration file");
+        config
+            .try_deserialize::<Self>()
+            .expect("Failed to generated config file")
+    }
 }
diff --git a/atoma-inference/src/core_thread.rs b/atoma-inference/src/core_thread.rs
diff --git a/atoma-inference/src/lib.rs b/atoma-inference/src/lib.rs
@@ -1,6 +1,8 @@
 pub mod config;
-pub mod core_thread;
+pub mod model_thread;
 pub mod models;
 pub mod service;
 pub mod specs;
 pub mod types;
+
+pub mod apis;