Aleph-Alpha · moldhouse · Dec 10, 2024 · Dec 9, 2024 · Dec 9, 2024 · Dec 9, 2024
diff --git a/.env.example b/.env.example
@@ -0,0 +1,2 @@
+PHARIA_AI_TOKEN=
+INFERENCE_URL=
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -7,15 +7,26 @@ on:
     branches: [ main ]
 
 jobs:
-  build:
-
+  lints:
     runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dtolnay/rust-toolchain@stable
+        with:
+          components: rustfmt, clippy
+      - uses: Swatinem/rust-cache@v2
+      - run: cargo fmt -- --check
+      - run: cargo clippy -- -D warnings
 
+  test:
+    runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v2
-    - name: Build
-      run: cargo build
-    - name: Run tests
-      env:
-        AA_API_TOKEN: ${{ secrets.AA_API_TOKEN }}
-      run: cargo test
+      - uses: actions/checkout@v4
+      - uses: Swatinem/rust-cache@v2
+      - name: Build
+        run: cargo build
+      - name: Run tests
+        env:
+          PHARIA_AI_TOKEN: ${{ secrets.PHARIA_AI_TOKEN }}
+          INFERENCE_URL: https://inference-api.product.pharia.com
+        run: cargo test
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,4 @@
-# Avoid commiting AA_API_TOKEN
+# Avoid commiting PHARIA_AI_TOKEN
 .env
 
 /target

diff --git a/Cargo.toml b/Cargo.toml
@@ -13,6 +13,7 @@ categories = ["api-bindings"]
 [dependencies]
 async-stream = "0.3.6"
 base64 = "0.22.0"
+dotenvy = "0.15.7"
 futures-util = "0.3.31"
 image = "0.25.1"
 itertools = "0.13.0"
@@ -26,6 +27,5 @@ tokenizers = { version = "0.21.0", default-features = false, features = [
 ] }
 
 [dev-dependencies]
-dotenv = "0.15.0"
 tokio = { version = "1.37.0", features = ["rt", "macros"] }
 wiremock = "0.6.0"
diff --git a/README.md b/README.md
@@ -10,7 +10,7 @@ use aleph_alpha_client::{Client, TaskCompletion, How, Task};
 #[tokio::main]
 fn main() {
     // Authenticate against API. Fetches token.
-    let client = Client::with_authentication("AA_API_TOKEN").unwrap();
+    let client = Client::from_env().unwrap();
 
     // Name of the model we we want to use. Large models give usually better answer, but are also
     // more costly.

diff --git a/src/chat.rs b/src/chat.rs
@@ -149,7 +149,7 @@ impl<'a> ChatBody<'a> {
     }
 }
 
-impl<'a> Task for TaskChat<'a> {
+impl Task for TaskChat<'_> {
     type Output = ChatOutput;
 
     type ResponseBody = ResponseChat;
@@ -196,7 +196,7 @@ pub struct ChatEvent {
     pub choices: Vec<ChatStreamChunk>,
 }
 
-impl<'a> StreamTask for TaskChat<'a> {
+impl StreamTask for TaskChat<'_> {
     type Output = ChatStreamChunk;
 
     type ResponseBody = ChatEvent;
@@ -207,7 +207,7 @@ impl<'a> StreamTask for TaskChat<'a> {
         base: &str,
         model: &str,
     ) -> reqwest::RequestBuilder {
-        let body = ChatBody::new(model, &self).with_streaming();
+        let body = ChatBody::new(model, self).with_streaming();
         client.post(format!("{base}/chat/completions")).json(&body)
     }
 

diff --git a/src/completion.rs b/src/completion.rs
@@ -94,7 +94,7 @@ pub struct Stopping<'a> {
     pub stop_sequences: &'a [&'a str],
 }
 
-impl<'a> Stopping<'a> {
+impl Stopping<'_> {
     /// Only stop once the model reaches its technical limit, usually the context window.
     pub const NO_TOKEN_LIMIT: Self = Stopping {
         maximum_tokens: None,
@@ -270,7 +270,7 @@ impl StreamTask for TaskCompletion<'_> {
         base: &str,
         model: &str,
     ) -> reqwest::RequestBuilder {
-        let body = BodyCompletion::new(model, &self).with_streaming();
+        let body = BodyCompletion::new(model, self).with_streaming();
         client.post(format!("{base}/complete")).json(&body)
     }
 

diff --git a/src/detokenization.rs b/src/detokenization.rs
@@ -34,7 +34,7 @@ impl From<ResponseDetokenization> for DetokenizationOutput {
     }
 }
 
-impl<'a> Task for TaskDetokenization<'a> {
+impl Task for TaskDetokenization<'_> {
     type Output = DetokenizationOutput;
     type ResponseBody = ResponseDetokenization;
 

diff --git a/src/http.rs b/src/http.rs
@@ -65,7 +65,7 @@ pub struct MethodJob<'a, T> {
     pub task: &'a T,
 }
 
-impl<'a, T> Job for MethodJob<'a, T>
+impl<T> Job for MethodJob<'_, T>
 where
     T: Task,
 {
@@ -90,9 +90,9 @@ pub struct HttpClient {
 }
 
 impl HttpClient {
-    /// In production you typically would want set this to <https://api.aleph-alpha.com>. Yet you
-    /// may want to use a different instances for testing.
-    pub fn with_base_url(host: String, api_token: Option<String>) -> Result<Self, Error> {
+    /// In production you typically would want set this to <https://inference-api.pharia.your-company.com>.
+    /// Yet you may want to use a different instance for testing.
+    pub fn new(host: String, api_token: Option<String>) -> Result<Self, Error> {
         let http = ClientBuilder::new().build()?;
 
         Ok(Self {
@@ -139,7 +139,7 @@ impl HttpClient {
     ///
     /// async fn print_completion() -> Result<(), Error> {
     ///     // Authenticate against API. Fetches token.
-    ///     let client = Client::with_authentication("AA_API_TOKEN")?;
+    ///     let client = Client::from_env()?;
     ///
     ///     // Name of the model we we want to use. Large models give usually better answer, but are
     ///     // also slower and more costly.

diff --git a/src/lib.rs b/src/lib.rs
@@ -6,7 +6,7 @@
 //! #[tokio::main(flavor = "current_thread")]
 //! async fn main() {
 //!     // Authenticate against API. Fetches token.
-//!     let client = Client::with_authentication("AA_API_TOKEN").unwrap();
+//!     let client = Client::from_env().unwrap();
 //!
 //!     // Name of the model we we want to use. Large models give usually better answer, but are also
 //!     // more costly.
@@ -33,11 +33,12 @@ mod prompt;
 mod semantic_embedding;
 mod stream;
 mod tokenization;
-use std::{pin::Pin, time::Duration};
-
+use dotenvy::dotenv;
 use futures_util::Stream;
 use http::HttpClient;
 use semantic_embedding::{BatchSemanticEmbeddingOutput, SemanticEmbeddingOutput};
+use std::env;
+use std::{pin::Pin, time::Duration};
 use tokenizers::Tokenizer;
 
 pub use self::{
@@ -70,29 +71,27 @@ pub struct Client {
 
 impl Client {
     /// A new instance of an Aleph Alpha client helping you interact with the Aleph Alpha API.
-    /// For "normal" client applications you may likely rather use [`Self::with_authentication`] or
-    /// [`Self::with_base_url`].
     ///
+    /// Setting the token to None allows specifying it on a per request basis.
     /// You may want to only use request based authentication and skip default authentication. This
     /// is useful if writing an application which invokes the client on behalf of many different
     /// users. Having neither request, nor default authentication is considered a bug and will cause
     /// a panic.
-    pub fn new(host: String, api_token: Option<String>) -> Result<Self, Error> {
-        let http_client = HttpClient::with_base_url(host, api_token)?;
+    pub fn new(host: impl Into<String>, api_token: Option<String>) -> Result<Self, Error> {
+        let http_client = HttpClient::new(host.into(), api_token)?;
         Ok(Self { http_client })
     }
 
-    /// Use the Aleph Alpha SaaS offering with your API token for all requests.
-    pub fn with_authentication(api_token: impl Into<String>) -> Result<Self, Error> {
-        Self::with_base_url("https://api.aleph-alpha.com".to_owned(), api_token)
+    /// A client instance that always uses the same token for all requests.
+    pub fn with_auth(host: impl Into<String>, api_token: impl Into<String>) -> Result<Self, Error> {
+        Self::new(host, Some(api_token.into()))
     }
 
-    /// Use your on-premise inference with your API token for all requests.
-    ///
-    /// In production you typically would want set this to <https://api.aleph-alpha.com>. Yet
-    /// you may want to use a different instances for testing.
-    pub fn with_base_url(host: String, api_token: impl Into<String>) -> Result<Self, Error> {
-        Self::new(host, Some(api_token.into()))
+    pub fn from_env() -> Result<Self, Error> {
+        let _ = dotenv();
+        let api_token = env::var("PHARIA_AI_TOKEN").unwrap();
+        let inference_url = env::var("INFERENCE_URL").unwrap();
+        Self::with_auth(inference_url, api_token)
     }
 
     /// Execute a task with the aleph alpha API and fetch its result.
@@ -102,7 +101,7 @@ impl Client {
     ///
     /// async fn print_completion() -> Result<(), Error> {
     ///     // Authenticate against API. Fetches token.
-    ///     let client = Client::with_authentication("AA_API_TOKEN")?;
+    ///     let client = Client::from_env()?;
     ///
     ///     // Name of the model we we want to use. Large models give usually better answer, but are
     ///     // also slower and more costly.
@@ -166,7 +165,7 @@ impl Client {
     ///
     /// async fn print_completion() -> Result<(), Error> {
     ///     // Authenticate against API. Fetches token.
-    ///     let client = Client::with_authentication("AA_API_TOKEN")?;
+    ///     let client = Client::from_env()?;
     ///
     ///     // Name of the model we we want to use. Large models give usually better answer, but are
     ///     // also slower and more costly.
@@ -204,7 +203,7 @@ impl Client {
     ///
     /// async fn print_stream_completion() -> Result<(), Error> {
     ///     // Authenticate against API. Fetches token.
-    ///     let client = Client::with_authentication("AA_API_TOKEN")?;
+    ///     let client = Client::from_env()?;
     ///
     ///     // Name of the model we we want to use. Large models give usually better answer, but are
     ///     // also slower and more costly.
@@ -241,7 +240,7 @@ impl Client {
     ///
     /// async fn print_chat() -> Result<(), Error> {
     ///     // Authenticate against API. Fetches token.
-    ///     let client = Client::with_authentication("AA_API_TOKEN")?;
+    ///     let client = Client::from_env()?;
     ///
     ///     // Name of a model that supports chat.
     ///     let model = "pharia-1-llm-7b-control";
@@ -276,7 +275,7 @@ impl Client {
     ///
     /// async fn print_stream_chat() -> Result<(), Error> {
     ///     // Authenticate against API. Fetches token.
-    ///     let client = Client::with_authentication("AA_API_TOKEN")?;
+    ///     let client = Client::from_env()?;
     ///
     ///     // Name of a model that supports chat.
     ///     let model = "pharia-1-llm-7b-control";
@@ -312,7 +311,7 @@ impl Client {
     /// use aleph_alpha_client::{Client, How, TaskCompletion, Task, Error, Granularity, TaskExplanation, Stopping, Prompt, Sampling};
     ///
     /// async fn print_explanation() -> Result<(), Error> {
-    ///     let client = Client::with_authentication("AA_API_TOKEN")?;
+    ///     let client = Client::from_env()?;
     ///
     ///     // Name of the model we we want to use. Large models give usually better answer, but are
     ///     // also slower and more costly.
@@ -356,7 +355,7 @@ impl Client {
     /// use aleph_alpha_client::{Client, Error, How, TaskTokenization};
     ///
     /// async fn tokenize() -> Result<(), Error> {
-    ///     let client = Client::with_authentication("AA_API_TOKEN")?;
+    ///     let client = Client::from_env()?;
     ///
     ///     // Name of the model for which we want to tokenize text.
     ///     let model = "luminous-base";
@@ -392,7 +391,7 @@ impl Client {
     /// use aleph_alpha_client::{Client, Error, How, TaskDetokenization};
     ///
     /// async fn detokenize() -> Result<(), Error> {
-    ///     let client = Client::with_authentication("AA_API_TOKEN")?;
+    ///     let client = Client::from_env()?;
     ///
     ///     // Specify the name of the model whose tokenizer was used to generate the input token ids.
     ///     let model = "luminous-base";

diff --git a/src/prompt.rs b/src/prompt.rs
@@ -81,16 +81,13 @@ impl<'a> Modality<'a> {
     ///
     /// ```no_run
     /// use aleph_alpha_client::{Client, How, Modality, Prompt, Sampling, Stopping, TaskCompletion, Task};
-    /// use dotenv::dotenv;
+    /// use dotenvy::dotenv;
     /// use std::path::PathBuf;
     ///
     /// #[tokio::main(flavor = "current_thread")]
     /// async fn main() {
     ///     // Create client
-    ///     let _ = dotenv();
-    ///     let aa_api_token = std::env::var("AA_API_TOKEN")
-    ///         .expect("AA_API_TOKEN environment variable must be specified to run demo.");
-    ///     let client = Client::with_authentication(aa_api_token).unwrap();
+    ///     let client = Client::from_env().unwrap();
     ///     // Define task
     ///     let task = TaskCompletion {
     ///         prompt: Prompt::from_vec(vec![

diff --git a/src/stream.rs b/src/stream.rs
@@ -50,7 +50,7 @@ pub trait StreamTask {
     }
 }
 
-impl<'a, T> StreamJob for MethodJob<'a, T>
+impl<T> StreamJob for MethodJob<'_, T>
 where
     T: StreamTask,
 {

diff --git a/src/tokenization.rs b/src/tokenization.rs
@@ -14,7 +14,7 @@ pub struct TaskTokenization<'a> {
 }
 
 impl<'a> From<&'a str> for TaskTokenization<'a> {
-    fn from(prompt: &'a str) -> TaskTokenization {
+    fn from(prompt: &str) -> TaskTokenization {
         TaskTokenization {
             prompt,
             tokens: true,
-Original file line number
+Diff line change
@@ Expand Up / @@ -50,7 +50,7 @@ pub trait StreamTask { @@
         }
     }
-    impl<'a, T> StreamJob for MethodJob<'a, T>
+    impl<T> StreamJob for MethodJob<'_, T>
     where
         T: StreamTask,
     {
@@ Expand Down @@