diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..4b5a2d3
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,2 @@
+PHARIA_AI_TOKEN=
+INFERENCE_URL=
\ No newline at end of file
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 6cb6696..ffbce3c 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -7,15 +7,26 @@ on:
     branches: [ main ]
 
 jobs:
-  build:
-
+  lints:
     runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dtolnay/rust-toolchain@stable
+        with:
+          components: rustfmt, clippy
+      - uses: Swatinem/rust-cache@v2
+      - run: cargo fmt -- --check
+      - run: cargo clippy -- -D warnings
 
+  test:
+    runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v2
-    - name: Build
-      run: cargo build
-    - name: Run tests
-      env:
-        AA_API_TOKEN: ${{ secrets.AA_API_TOKEN }}
-      run: cargo test
+      - uses: actions/checkout@v4
+      - uses: Swatinem/rust-cache@v2
+      - name: Build
+        run: cargo build
+      - name: Run tests
+        env:
+          PHARIA_AI_TOKEN: ${{ secrets.PHARIA_AI_TOKEN }}
+          INFERENCE_URL: https://inference-api.product.pharia.com
+        run: cargo test
diff --git a/.gitignore b/.gitignore
index a38be1f..7d72550 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
-# Avoid commiting AA_API_TOKEN
+# Avoid commiting PHARIA_AI_TOKEN
 .env
 
 /target
diff --git a/Cargo.toml b/Cargo.toml
index f6ff605..18edf08 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,6 +13,7 @@ categories = ["api-bindings"]
 [dependencies]
 async-stream = "0.3.6"
 base64 = "0.22.0"
+dotenvy = "0.15.7"
 futures-util = "0.3.31"
 image = "0.25.1"
 itertools = "0.13.0"
@@ -26,6 +27,5 @@ tokenizers = { version = "0.21.0", default-features = false, features = [
 ] }
 
 [dev-dependencies]
-dotenv = "0.15.0"
 tokio = { version = "1.37.0", features = ["rt", "macros"] }
 wiremock = "0.6.0"
diff --git a/README.md b/README.md
index ff87da7..370f13b 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@ use aleph_alpha_client::{Client, TaskCompletion, How, Task};
 #[tokio::main]
 fn main() {
     // Authenticate against API. Fetches token.
-    let client = Client::with_authentication("AA_API_TOKEN").unwrap();
+    let client = Client::from_env().unwrap();
 
     // Name of the model we we want to use. Large models give usually better answer, but are also
     // more costly.
diff --git a/src/chat.rs b/src/chat.rs
index 35db048..ff413c1 100644
--- a/src/chat.rs
+++ b/src/chat.rs
@@ -149,7 +149,7 @@ impl<'a> ChatBody<'a> {
     }
 }
 
-impl<'a> Task for TaskChat<'a> {
+impl Task for TaskChat<'_> {
     type Output = ChatOutput;
 
     type ResponseBody = ResponseChat;
@@ -196,7 +196,7 @@ pub struct ChatEvent {
     pub choices: Vec<ChatStreamChunk>,
 }
 
-impl<'a> StreamTask for TaskChat<'a> {
+impl StreamTask for TaskChat<'_> {
     type Output = ChatStreamChunk;
 
     type ResponseBody = ChatEvent;
@@ -207,7 +207,7 @@ impl<'a> StreamTask for TaskChat<'a> {
         base: &str,
         model: &str,
     ) -> reqwest::RequestBuilder {
-        let body = ChatBody::new(model, &self).with_streaming();
+        let body = ChatBody::new(model, self).with_streaming();
         client.post(format!("{base}/chat/completions")).json(&body)
     }
 
diff --git a/src/completion.rs b/src/completion.rs
index 70301ba..b1aedb5 100644
--- a/src/completion.rs
+++ b/src/completion.rs
@@ -94,7 +94,7 @@ pub struct Stopping<'a> {
     pub stop_sequences: &'a [&'a str],
 }
 
-impl<'a> Stopping<'a> {
+impl Stopping<'_> {
     /// Only stop once the model reaches its technical limit, usually the context window.
     pub const NO_TOKEN_LIMIT: Self = Stopping {
         maximum_tokens: None,
@@ -270,7 +270,7 @@ impl StreamTask for TaskCompletion<'_> {
         base: &str,
         model: &str,
     ) -> reqwest::RequestBuilder {
-        let body = BodyCompletion::new(model, &self).with_streaming();
+        let body = BodyCompletion::new(model, self).with_streaming();
         client.post(format!("{base}/complete")).json(&body)
     }
 
diff --git a/src/detokenization.rs b/src/detokenization.rs
index 644fc10..2a728e8 100644
--- a/src/detokenization.rs
+++ b/src/detokenization.rs
@@ -34,7 +34,7 @@ impl From<ResponseDetokenization> for DetokenizationOutput {
     }
 }
 
-impl<'a> Task for TaskDetokenization<'a> {
+impl Task for TaskDetokenization<'_> {
     type Output = DetokenizationOutput;
     type ResponseBody = ResponseDetokenization;
 
diff --git a/src/http.rs b/src/http.rs
index bccbb76..7f9022f 100644
--- a/src/http.rs
+++ b/src/http.rs
@@ -65,7 +65,7 @@ pub struct MethodJob<'a, T> {
     pub task: &'a T,
 }
 
-impl<'a, T> Job for MethodJob<'a, T>
+impl<T> Job for MethodJob<'_, T>
 where
     T: Task,
 {
@@ -90,9 +90,9 @@ pub struct HttpClient {
 }
 
 impl HttpClient {
-    /// In production you typically would want set this to <https://api.aleph-alpha.com>. Yet you
-    /// may want to use a different instances for testing.
-    pub fn with_base_url(host: String, api_token: Option<String>) -> Result<Self, Error> {
+    /// In production you typically would want set this to <https://inference-api.pharia.your-company.com>.
+    /// Yet you may want to use a different instance for testing.
+    pub fn new(host: String, api_token: Option<String>) -> Result<Self, Error> {
         let http = ClientBuilder::new().build()?;
 
         Ok(Self {
@@ -139,7 +139,7 @@ impl HttpClient {
     ///
     /// async fn print_completion() -> Result<(), Error> {
     ///     // Authenticate against API. Fetches token.
-    ///     let client = Client::with_authentication("AA_API_TOKEN")?;
+    ///     let client = Client::from_env()?;
     ///
     ///     // Name of the model we we want to use. Large models give usually better answer, but are
     ///     // also slower and more costly.
diff --git a/src/lib.rs b/src/lib.rs
index 6d85f8f..21ed9b5 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -6,7 +6,7 @@
 //! #[tokio::main(flavor = "current_thread")]
 //! async fn main() {
 //!     // Authenticate against API. Fetches token.
-//!     let client = Client::with_authentication("AA_API_TOKEN").unwrap();
+//!     let client = Client::from_env().unwrap();
 //!
 //!     // Name of the model we we want to use. Large models give usually better answer, but are also
 //!     // more costly.
@@ -33,11 +33,12 @@ mod prompt;
 mod semantic_embedding;
 mod stream;
 mod tokenization;
-use std::{pin::Pin, time::Duration};
-
+use dotenvy::dotenv;
 use futures_util::Stream;
 use http::HttpClient;
 use semantic_embedding::{BatchSemanticEmbeddingOutput, SemanticEmbeddingOutput};
+use std::env;
+use std::{pin::Pin, time::Duration};
 use tokenizers::Tokenizer;
 
 pub use self::{
@@ -70,29 +71,27 @@ pub struct Client {
 
 impl Client {
     /// A new instance of an Aleph Alpha client helping you interact with the Aleph Alpha API.
-    /// For "normal" client applications you may likely rather use [`Self::with_authentication`] or
-    /// [`Self::with_base_url`].
     ///
+    /// Setting the token to None allows specifying it on a per request basis.
     /// You may want to only use request based authentication and skip default authentication. This
     /// is useful if writing an application which invokes the client on behalf of many different
     /// users. Having neither request, nor default authentication is considered a bug and will cause
     /// a panic.
-    pub fn new(host: String, api_token: Option<String>) -> Result<Self, Error> {
-        let http_client = HttpClient::with_base_url(host, api_token)?;
+    pub fn new(host: impl Into<String>, api_token: Option<String>) -> Result<Self, Error> {
+        let http_client = HttpClient::new(host.into(), api_token)?;
         Ok(Self { http_client })
     }
 
-    /// Use the Aleph Alpha SaaS offering with your API token for all requests.
-    pub fn with_authentication(api_token: impl Into<String>) -> Result<Self, Error> {
-        Self::with_base_url("https://api.aleph-alpha.com".to_owned(), api_token)
+    /// A client instance that always uses the same token for all requests.
+    pub fn with_auth(host: impl Into<String>, api_token: impl Into<String>) -> Result<Self, Error> {
+        Self::new(host, Some(api_token.into()))
     }
 
-    /// Use your on-premise inference with your API token for all requests.
-    ///
-    /// In production you typically would want set this to <https://api.aleph-alpha.com>. Yet
-    /// you may want to use a different instances for testing.
-    pub fn with_base_url(host: String, api_token: impl Into<String>) -> Result<Self, Error> {
-        Self::new(host, Some(api_token.into()))
+    pub fn from_env() -> Result<Self, Error> {
+        let _ = dotenv();
+        let api_token = env::var("PHARIA_AI_TOKEN").unwrap();
+        let inference_url = env::var("INFERENCE_URL").unwrap();
+        Self::with_auth(inference_url, api_token)
     }
 
     /// Execute a task with the aleph alpha API and fetch its result.
@@ -102,7 +101,7 @@ impl Client {
     ///
     /// async fn print_completion() -> Result<(), Error> {
     ///     // Authenticate against API. Fetches token.
-    ///     let client = Client::with_authentication("AA_API_TOKEN")?;
+    ///     let client = Client::from_env()?;
     ///
     ///     // Name of the model we we want to use. Large models give usually better answer, but are
     ///     // also slower and more costly.
@@ -166,7 +165,7 @@ impl Client {
     ///
     /// async fn print_completion() -> Result<(), Error> {
     ///     // Authenticate against API. Fetches token.
-    ///     let client = Client::with_authentication("AA_API_TOKEN")?;
+    ///     let client = Client::from_env()?;
     ///
     ///     // Name of the model we we want to use. Large models give usually better answer, but are
     ///     // also slower and more costly.
@@ -204,7 +203,7 @@ impl Client {
     ///
     /// async fn print_stream_completion() -> Result<(), Error> {
     ///     // Authenticate against API. Fetches token.
-    ///     let client = Client::with_authentication("AA_API_TOKEN")?;
+    ///     let client = Client::from_env()?;
     ///
     ///     // Name of the model we we want to use. Large models give usually better answer, but are
     ///     // also slower and more costly.
@@ -241,7 +240,7 @@ impl Client {
     ///
     /// async fn print_chat() -> Result<(), Error> {
     ///     // Authenticate against API. Fetches token.
-    ///     let client = Client::with_authentication("AA_API_TOKEN")?;
+    ///     let client = Client::from_env()?;
     ///
     ///     // Name of a model that supports chat.
     ///     let model = "pharia-1-llm-7b-control";
@@ -276,7 +275,7 @@ impl Client {
     ///
     /// async fn print_stream_chat() -> Result<(), Error> {
     ///     // Authenticate against API. Fetches token.
-    ///     let client = Client::with_authentication("AA_API_TOKEN")?;
+    ///     let client = Client::from_env()?;
     ///
     ///     // Name of a model that supports chat.
     ///     let model = "pharia-1-llm-7b-control";
@@ -312,7 +311,7 @@ impl Client {
     /// use aleph_alpha_client::{Client, How, TaskCompletion, Task, Error, Granularity, TaskExplanation, Stopping, Prompt, Sampling};
     ///
     /// async fn print_explanation() -> Result<(), Error> {
-    ///     let client = Client::with_authentication("AA_API_TOKEN")?;
+    ///     let client = Client::from_env()?;
     ///
     ///     // Name of the model we we want to use. Large models give usually better answer, but are
     ///     // also slower and more costly.
@@ -356,7 +355,7 @@ impl Client {
     /// use aleph_alpha_client::{Client, Error, How, TaskTokenization};
     ///
     /// async fn tokenize() -> Result<(), Error> {
-    ///     let client = Client::with_authentication("AA_API_TOKEN")?;
+    ///     let client = Client::from_env()?;
     ///
     ///     // Name of the model for which we want to tokenize text.
     ///     let model = "luminous-base";
@@ -392,7 +391,7 @@ impl Client {
     /// use aleph_alpha_client::{Client, Error, How, TaskDetokenization};
     ///
     /// async fn detokenize() -> Result<(), Error> {
-    ///     let client = Client::with_authentication("AA_API_TOKEN")?;
+    ///     let client = Client::from_env()?;
     ///
     ///     // Specify the name of the model whose tokenizer was used to generate the input token ids.
     ///     let model = "luminous-base";
diff --git a/src/prompt.rs b/src/prompt.rs
index c34e17b..21c1188 100644
--- a/src/prompt.rs
+++ b/src/prompt.rs
@@ -81,16 +81,13 @@ impl<'a> Modality<'a> {
     ///
     /// ```no_run
     /// use aleph_alpha_client::{Client, How, Modality, Prompt, Sampling, Stopping, TaskCompletion, Task};
-    /// use dotenv::dotenv;
+    /// use dotenvy::dotenv;
     /// use std::path::PathBuf;
     ///
     /// #[tokio::main(flavor = "current_thread")]
     /// async fn main() {
     ///     // Create client
-    ///     let _ = dotenv();
-    ///     let aa_api_token = std::env::var("AA_API_TOKEN")
-    ///         .expect("AA_API_TOKEN environment variable must be specified to run demo.");
-    ///     let client = Client::with_authentication(aa_api_token).unwrap();
+    ///     let client = Client::from_env().unwrap();
     ///     // Define task
     ///     let task = TaskCompletion {
     ///         prompt: Prompt::from_vec(vec![
diff --git a/src/stream.rs b/src/stream.rs
index 0fe5e52..de9b360 100644
--- a/src/stream.rs
+++ b/src/stream.rs
@@ -50,7 +50,7 @@ pub trait StreamTask {
     }
 }
 
-impl<'a, T> StreamJob for MethodJob<'a, T>
+impl<T> StreamJob for MethodJob<'_, T>
 where
     T: StreamTask,
 {
diff --git a/src/tokenization.rs b/src/tokenization.rs
index e552622..4e5bb4d 100644
--- a/src/tokenization.rs
+++ b/src/tokenization.rs
@@ -14,7 +14,7 @@ pub struct TaskTokenization<'a> {
 }
 
 impl<'a> From<&'a str> for TaskTokenization<'a> {
-    fn from(prompt: &'a str) -> TaskTokenization {
+    fn from(prompt: &str) -> TaskTokenization {
         TaskTokenization {
             prompt,
             tokens: true,
diff --git a/tests/integration.rs b/tests/integration.rs
index 7be5332..3349242 100644
--- a/tests/integration.rs
+++ b/tests/integration.rs
@@ -1,4 +1,4 @@
-use std::{fs::File, io::BufReader, sync::OnceLock};
+use std::{fs::File, io::BufReader};
 
 use aleph_alpha_client::{
     cosine_similarity, Client, CompletionEvent, Granularity, How, ImageScore, ItemExplanation,
@@ -6,17 +6,27 @@ use aleph_alpha_client::{
     TaskBatchSemanticEmbedding, TaskChat, TaskCompletion, TaskDetokenization, TaskExplanation,
     TaskSemanticEmbedding, TaskTokenization, TextScore,
 };
-use dotenv::dotenv;
+use dotenvy::dotenv;
 use futures_util::StreamExt;
 use image::ImageFormat;
+use std::sync::LazyLock;
 
-fn api_token() -> &'static str {
-    static AA_API_TOKEN: OnceLock<String> = OnceLock::new();
-    AA_API_TOKEN.get_or_init(|| {
+fn pharia_ai_token() -> &'static str {
+    static PHARIA_AI_TOKEN: LazyLock<String> = LazyLock::new(|| {
         drop(dotenv());
-        std::env::var("AA_API_TOKEN")
-            .expect("AA_API_TOKEN environment variable must be specified to run tests.")
-    })
+        std::env::var("PHARIA_AI_TOKEN")
+            .expect("PHARIA_AI_TOKEN environment variable must be specified to run tests.")
+    });
+    &PHARIA_AI_TOKEN
+}
+
+fn inference_url() -> &'static str {
+    static INFERENCE_URL: LazyLock<String> = LazyLock::new(|| {
+        drop(dotenv());
+        std::env::var("INFERENCE_URL")
+            .expect("INFERENCE_URL environment variable must be specified to run tests.")
+    });
+    &INFERENCE_URL
 }
 
 #[tokio::test]
@@ -26,7 +36,7 @@ async fn chat_with_pharia_1_7b_base() {
     let task = TaskChat::with_message(message);
 
     let model = "pharia-1-llm-7b-control";
-    let client = Client::with_authentication(api_token()).unwrap();
+    let client = Client::with_auth(inference_url(), pharia_ai_token()).unwrap();
     let response = client.chat(&task, model, &How::default()).await.unwrap();
 
     // Then
@@ -39,7 +49,7 @@ async fn completion_with_luminous_base() {
     let task = TaskCompletion::from_text("Hello").with_maximum_tokens(1);
 
     let model = "luminous-base";
-    let client = Client::with_authentication(api_token()).unwrap();
+    let client = Client::with_auth(inference_url(), pharia_ai_token()).unwrap();
     let response = client
         .output_of(&task.with_model(model), &How::default())
         .await
@@ -53,16 +63,16 @@ async fn completion_with_luminous_base() {
 
 #[tokio::test]
 async fn request_authentication_has_priority() {
-    let bad_aa_api_token = "DUMMY";
+    let bad_pharia_ai_token = "DUMMY";
     let task = TaskCompletion::from_text("Hello").with_maximum_tokens(1);
 
     let model = "luminous-base";
-    let client = Client::with_authentication(bad_aa_api_token).unwrap();
+    let client = Client::with_auth(inference_url(), bad_pharia_ai_token).unwrap();
     let response = client
         .output_of(
             &task.with_model(model),
             &How {
-                api_token: Some(api_token().to_owned()),
+                api_token: Some(pharia_ai_token().to_owned()),
                 ..Default::default()
             },
         )
@@ -82,12 +92,12 @@ async fn authentication_only_per_request() {
     let task = TaskCompletion::from_text("Hello").with_maximum_tokens(1);
 
     // When
-    let client = Client::new("https://api.aleph-alpha.com".to_owned(), None).unwrap();
+    let client = Client::new(inference_url().to_owned(), None).unwrap();
     let response = client
         .output_of(
             &task.with_model(model),
             &How {
-                api_token: Some(api_token().to_owned()),
+                api_token: Some(pharia_ai_token().to_owned()),
                 ..Default::default()
             },
         )
@@ -106,7 +116,7 @@ async fn must_panic_if_authentication_is_missing() {
     let task = TaskCompletion::from_text("Hello").with_maximum_tokens(1);
 
     // When
-    let client = Client::new("https://api.aleph-alpha.com".to_owned(), None).unwrap();
+    let client = Client::new(inference_url().to_owned(), None).unwrap();
     client
         .output_of(&task.with_model(model), &How::default())
         .await
@@ -130,7 +140,7 @@ async fn semanitc_search_with_luminous_base() {
         temperature, traditionally in a wood-fired oven.",
     );
     let query = Prompt::from_text("What is Pizza?");
-    let client = Client::with_authentication(api_token()).unwrap();
+    let client = Client::with_auth(inference_url(), pharia_ai_token()).unwrap();
 
     // When
     let robot_embedding_task = TaskSemanticEmbedding {
@@ -193,7 +203,7 @@ async fn complete_structured_prompt() {
         sampling: Sampling::MOST_LIKELY,
     };
     let model = "luminous-base";
-    let client = Client::with_authentication(api_token()).unwrap();
+    let client = Client::with_auth(inference_url(), pharia_ai_token()).unwrap();
     let response = client
         .output_of(&task.with_model(model), &How::default())
         .await
@@ -222,7 +232,7 @@ async fn maximum_tokens_none_request() {
         sampling: Sampling::MOST_LIKELY,
     };
     let model = "luminous-base";
-    let client = Client::with_authentication(api_token()).unwrap();
+    let client = Client::with_auth(inference_url(), pharia_ai_token()).unwrap();
     let response = client
         .output_of(&task.with_model(model), &How::default())
         .await
@@ -243,7 +253,7 @@ async fn explain_request() {
         target: " How is it going?",
         granularity: Granularity::default().with_prompt_granularity(PromptGranularity::Sentence),
     };
-    let client = Client::with_authentication(api_token()).unwrap();
+    let client = Client::with_auth(inference_url(), pharia_ai_token()).unwrap();
 
     // When
     let response = client
@@ -273,7 +283,7 @@ async fn explain_request_with_auto_granularity() {
         target: " How is it going?",
         granularity: Granularity::default(),
     };
-    let client = Client::with_authentication(api_token()).unwrap();
+    let client = Client::with_auth(inference_url(), pharia_ai_token()).unwrap();
 
     // When
     let response = client
@@ -305,7 +315,7 @@ async fn explain_request_with_image_modality() {
         target: " a cat.",
         granularity: Granularity::default().with_prompt_granularity(PromptGranularity::Paragraph),
     };
-    let client = Client::with_authentication(api_token()).unwrap();
+    let client = Client::with_auth(inference_url(), pharia_ai_token()).unwrap();
 
     // When
     let response = client
@@ -355,7 +365,7 @@ async fn describe_image_starting_from_a_path() {
         sampling: Sampling::MOST_LIKELY,
     };
     let model = "luminous-base";
-    let client = Client::with_authentication(api_token()).unwrap();
+    let client = Client::with_auth(inference_url(), pharia_ai_token()).unwrap();
     let response = client
         .output_of(&task.with_model(model), &How::default())
         .await
@@ -384,7 +394,7 @@ async fn describe_image_starting_from_a_dyn_image() {
         sampling: Sampling::MOST_LIKELY,
     };
     let model = "luminous-base";
-    let client = Client::with_authentication(api_token()).unwrap();
+    let client = Client::with_auth(inference_url(), pharia_ai_token()).unwrap();
     let response = client
         .output_of(&task.with_model(model), &How::default())
         .await
@@ -410,7 +420,7 @@ async fn only_answer_with_specific_animal() {
         },
     };
     let model = "luminous-base";
-    let client = Client::with_authentication(api_token()).unwrap();
+    let client = Client::with_auth(inference_url(), pharia_ai_token()).unwrap();
     let response = client
         .output_of(&task.with_model(model), &How::default())
         .await
@@ -437,7 +447,7 @@ async fn answer_should_continue() {
         },
     };
     let model = "luminous-base";
-    let client = Client::with_authentication(api_token()).unwrap();
+    let client = Client::with_auth(inference_url(), pharia_ai_token()).unwrap();
     let response = client
         .output_of(&task.with_model(model), &How::default())
         .await
@@ -464,7 +474,7 @@ async fn batch_semanitc_embed_with_luminous_base() {
         temperature, traditionally in a wood-fired oven.",
     );
 
-    let client = Client::with_authentication(api_token()).unwrap();
+    let client = Client::with_auth(inference_url(), pharia_ai_token()).unwrap();
 
     // When
     let embedding_task = TaskBatchSemanticEmbedding {
@@ -489,7 +499,7 @@ async fn tokenization_with_luminous_base() {
     // Given
     let input = "Hello, World!";
 
-    let client = Client::with_authentication(api_token()).unwrap();
+    let client = Client::with_auth(inference_url(), pharia_ai_token()).unwrap();
 
     // When
     let task1 = TaskTokenization::new(input, false, true);
@@ -526,7 +536,7 @@ async fn detokenization_with_luminous_base() {
     // Given
     let input = vec![49222, 15, 5390, 4];
 
-    let client = Client::with_authentication(api_token()).unwrap();
+    let client = Client::with_auth(inference_url(), pharia_ai_token()).unwrap();
 
     // When
     let task = TaskDetokenization { token_ids: &input };
@@ -543,11 +553,11 @@ async fn detokenization_with_luminous_base() {
 #[tokio::test]
 async fn fetch_tokenizer_for_pharia_1_llm_7b() {
     // Given
-    let client = Client::with_authentication(api_token()).unwrap();
+    let client = Client::with_auth(inference_url(), pharia_ai_token()).unwrap();
 
     // When
     let tokenizer = client
-        .tokenizer_by_model("Pharia-1-LLM-7B-control", None)
+        .tokenizer_by_model("pharia-1-llm-7b-control", None)
         .await
         .unwrap();
 
@@ -558,7 +568,7 @@ async fn fetch_tokenizer_for_pharia_1_llm_7b() {
 #[tokio::test]
 async fn stream_completion() {
     // Given a streaming completion task
-    let client = Client::with_authentication(api_token()).unwrap();
+    let client = Client::with_auth(inference_url(), pharia_ai_token()).unwrap();
     let task = TaskCompletion::from_text("").with_maximum_tokens(7);
 
     // When the events are streamed and collected
@@ -591,7 +601,7 @@ async fn stream_completion() {
 #[tokio::test]
 async fn stream_chat_with_pharia_1_llm_7b() {
     // Given a streaming completion task
-    let client = Client::with_authentication(api_token()).unwrap();
+    let client = Client::with_auth(inference_url(), pharia_ai_token()).unwrap();
     let message = Message::user("Hello,");
     let task = TaskChat::with_messages(vec![message]).with_maximum_tokens(7);
 
diff --git a/tests/unit.rs b/tests/unit.rs
index e29c6a7..c687803 100644
--- a/tests/unit.rs
+++ b/tests/unit.rs
@@ -34,7 +34,7 @@ async fn completion_with_luminous_base() {
     // When
     let task = TaskCompletion::from_text("Hello,").with_maximum_tokens(1);
     let model = "luminous-base";
-    let client = Client::with_base_url(mock_server.uri(), "dummy-token").unwrap();
+    let client = Client::with_auth(mock_server.uri(), "dummy-token").unwrap();
     let response = client
         .output_of(&task.with_model(model), &How::default())
         .await
@@ -74,7 +74,7 @@ async fn detect_rate_limiting() {
     // When
     let task = TaskCompletion::from_text("Hello,").with_maximum_tokens(1);
     let model = "luminous-base";
-    let client = Client::with_base_url(mock_server.uri(), "dummy-token").unwrap();
+    let client = Client::with_auth(mock_server.uri(), "dummy-token").unwrap();
     let error = client
         .output_of(&task.with_model(model), &How::default())
         .await
@@ -118,7 +118,7 @@ async fn detect_queue_full() {
     // When
     let task = TaskCompletion::from_text("Hello,").with_maximum_tokens(1);
     let model = "luminous-base";
-    let client = Client::with_base_url(mock_server.uri(), "dummy-token").unwrap();
+    let client = Client::with_auth(mock_server.uri(), "dummy-token").unwrap();
     let error = client
         .output_of(&task.with_model(model), &How::default())
         .await
@@ -155,7 +155,7 @@ async fn detect_service_unavailable() {
     // When
     let task = TaskCompletion::from_text("Hello,").with_maximum_tokens(1);
     let model = "luminous-base";
-    let client = Client::with_base_url(mock_server.uri(), "dummy-token").unwrap();
+    let client = Client::with_auth(mock_server.uri(), "dummy-token").unwrap();
     let error = client
         .output_of(&task.with_model(model), &How::default())
         .await
@@ -177,7 +177,7 @@ async fn be_nice() {
     // When
     let task = TaskCompletion::from_text("Hello,").with_maximum_tokens(1);
     let model = "luminous-base";
-    let client = Client::with_base_url(mock_server.uri(), "dummy-token").unwrap();
+    let client = Client::with_auth(mock_server.uri(), "dummy-token").unwrap();
     // Drop result, answer is meaningless anyway
     let _ = client
         .output_of(
@@ -206,7 +206,7 @@ async fn client_timeout() {
         .respond_with(ResponseTemplate::new(StatusCode::OK).set_delay(response_time))
         .mount(&mock_server)
         .await;
-    let client = Client::with_base_url(mock_server.uri(), "dummy-token").unwrap();
+    let client = Client::with_auth(mock_server.uri(), "dummy-token").unwrap();
 
     // When
     let result = client