From 09c0fa70ab529c9f1c252cdc33af17422ffcbe82 Mon Sep 17 00:00:00 2001 From: erhant Date: Mon, 14 Oct 2024 23:49:50 +0300 Subject: [PATCH] feat: `jina` and `serper` API checks (#133) * added `jina` and `serper` checks * tiny typo fixes, update workflows --- Cargo.lock | 49 +++++++++--------- Cargo.toml | 2 +- workflows/Cargo.toml | 1 - workflows/src/apis/jina.rs | 92 +++++++++++++++++++++++++++++++++ workflows/src/apis/mod.rs | 5 ++ workflows/src/apis/serper.rs | 98 ++++++++++++++++++++++++++++++++++++ workflows/src/config.rs | 20 +++++++- workflows/src/lib.rs | 7 +-- 8 files changed, 243 insertions(+), 31 deletions(-) create mode 100644 workflows/src/apis/jina.rs create mode 100644 workflows/src/apis/mod.rs create mode 100644 workflows/src/apis/serper.rs diff --git a/Cargo.lock b/Cargo.lock index 6909c41..e1f0468 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -532,9 +532,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.28" +version = "1.1.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e80e3b6a3ab07840e1cae9b0666a63970dc28e8ed5ffbcdacbfc760c281bfc1" +checksum = "b16803a61b81d9eabb7eae2588776c4c1e584b738ede45fdbb4c972cec1e9945" dependencies = [ "shlex", ] @@ -980,7 +980,7 @@ dependencies = [ [[package]] name = "dkn-compute" -version = "0.2.14" +version = "0.2.15" dependencies = [ "async-trait", "base64 0.22.1", @@ -1012,7 +1012,7 @@ dependencies = [ [[package]] name = "dkn-p2p" -version = "0.2.14" +version = "0.2.15" dependencies = [ "env_logger 0.11.5", "eyre", @@ -1024,9 +1024,8 @@ dependencies = [ [[package]] name = "dkn-workflows" -version = "0.2.14" +version = "0.2.15" dependencies = [ - "async-trait", "dotenvy", "env_logger 0.11.5", "eyre", @@ -2250,9 +2249,9 @@ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "js-sys" -version = "0.3.70" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1868808506b929d7b0cfa8f75951347aa71bb21144b7791bae35d9bccfcfe37a" +checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" dependencies = [ "wasm-bindgen", ] @@ -3373,7 +3372,7 @@ dependencies = [ [[package]] name = "ollama-workflows" version = "0.1.0" -source = "git+https://github.com/andthattoo/ollama-workflows#f1873801b731c287509f36e1966c4f3b28f7fd85" +source = "git+https://github.com/andthattoo/ollama-workflows#2c70764b0f040a78e622811975bcb5c48e056341" dependencies = [ "async-trait", "colored", @@ -4393,9 +4392,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" +checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" [[package]] name = "rw-stream-sink" @@ -5480,9 +5479,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5" +checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" dependencies = [ "cfg-if 1.0.0", "once_cell", @@ -5493,9 +5492,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9de396da306523044d3302746f1208fa71d7532227f15e347e2d93e4145dd77b" +checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" dependencies = [ "bumpalo", "log", @@ -5508,9 +5507,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.43" +version = "0.4.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61e9300f63a621e96ed275155c108eb6f843b6a26d053f122ab69724559dc8ed" +checksum = "cc7ec4f8827a71586374db3e87abdb5a2bb3a15afed140221307c3ec06b1f63b" dependencies = [ "cfg-if 1.0.0", "js-sys", @@ -5520,9 +5519,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "585c4c91a46b072c92e908d99cb1dcdf95c5218eeb6f3bf1efa991ee7a68cccf" +checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -5530,9 +5529,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" +checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" dependencies = [ "proc-macro2", "quote", @@ -5543,9 +5542,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" +checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" [[package]] name = "wasm-streams" @@ -5562,9 +5561,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.70" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26fdeaafd9bd129f65e7c031593c24d62186301e0c72c8978fa1678be7d532c0" +checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/Cargo.toml b/Cargo.toml index 4665c45..4c0c9b5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ default-members = ["compute"] [workspace.package] edition = "2021" -version = "0.2.14" +version = "0.2.15" license = "Apache-2.0" readme = "README.md" diff --git a/workflows/Cargo.toml b/workflows/Cargo.toml index aa71c14..385e95f 100644 --- a/workflows/Cargo.toml +++ b/workflows/Cargo.toml @@ -14,7 +14,6 @@ ollama-workflows = { git = "https://github.com/andthattoo/ollama-workflows" } # async stuff tokio-util.workspace = true tokio.workspace = true -async-trait.workspace = true # serialize & deserialize serde.workspace = true diff --git a/workflows/src/apis/jina.rs b/workflows/src/apis/jina.rs new file mode 100644 index 0000000..7879fd8 --- /dev/null +++ b/workflows/src/apis/jina.rs @@ -0,0 +1,92 @@ +use eyre::{eyre, Context, Result}; +use reqwest::Client; +use std::env; + +/// Makes a request for `example.com`. +const JINA_EXAMPLE_ENDPOINT: &str = "https://r.jina.ai/https://example.com"; +const ENV_VAR_NAME: &str = "JINA_API_KEY"; + +/// Jina-specific configurations. +#[derive(Debug, Clone, Default)] +pub struct JinaConfig { + /// API key, if available. + api_key: Option, +} + +impl JinaConfig { + /// Looks at the environment variables for Jina API key. + pub fn new() -> Self { + Self { + api_key: env::var(ENV_VAR_NAME).ok(), + } + } + + /// Sets the API key for Jina. + pub fn with_api_key(mut self, api_key: String) -> Self { + self.api_key = Some(api_key); + self + } + + /// Checks API KEY, and if it exists tries a dummy request. + /// Fails if the provided API KEY is not authorized enough for the dummy request. + /// + /// Equivalent cURL is as follows: + /// + /// ```sh + /// curl 'https://r.jina.ai/https://example.com' \ + /// -H "Authorization: Bearer jina_key" + /// ``` + pub async fn check_optional(&self) -> Result<()> { + // check API key + let Some(api_key) = &self.api_key else { + log::debug!("Jina API key not found, skipping Jina check"); + return Ok(()); + }; + log::info!("Jina API key found, checking Jina service"); + + // make a dummy request models + let client = Client::new(); + let request = client + .get(JINA_EXAMPLE_ENDPOINT) + .header("Authorization", format!("Bearer {}", api_key)) + .build() + .wrap_err("failed to build request")?; + + let response = client + .execute(request) + .await + .wrap_err("failed to send request")?; + + // parse response + if response.status().is_client_error() { + return Err(eyre!("Failed to make Jina request",)) + .wrap_err(response.text().await.unwrap_or_default()); + } + + log::info!("Jina check succesful!"); + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + #[ignore = "requires Jina API key"] + async fn test_jina_check() { + let _ = dotenvy::dotenv(); + assert!(env::var(ENV_VAR_NAME).is_ok()); + let res = JinaConfig::new().check_optional().await; + assert!(res.is_ok(), "should pass with api key"); + + env::set_var(ENV_VAR_NAME, "i-dont-work"); + let res = JinaConfig::new().check_optional().await; + assert!(res.is_err(), "should fail with bad api key"); + + env::remove_var(ENV_VAR_NAME); + let res = JinaConfig::new().check_optional().await; + assert!(res.is_ok(), "should pass without api key"); + } +} diff --git a/workflows/src/apis/mod.rs b/workflows/src/apis/mod.rs new file mode 100644 index 0000000..f54af16 --- /dev/null +++ b/workflows/src/apis/mod.rs @@ -0,0 +1,5 @@ +mod jina; +pub use jina::JinaConfig; + +mod serper; +pub use serper::SerperConfig; diff --git a/workflows/src/apis/serper.rs b/workflows/src/apis/serper.rs new file mode 100644 index 0000000..56290de --- /dev/null +++ b/workflows/src/apis/serper.rs @@ -0,0 +1,98 @@ +use eyre::{eyre, Context, Result}; +use reqwest::Client; +use std::env; + +/// Makes a search request. +const SERPER_EXAMPLE_ENDPOINT: &str = "https://google.serper.dev/search"; +const ENV_VAR_NAME: &str = "SERPER_API_KEY"; + +/// Serper-specific configurations. +#[derive(Debug, Clone, Default)] +pub struct SerperConfig { + /// API key, if available. + api_key: Option, +} + +impl SerperConfig { + /// Looks at the environment variables for Serper API key. + pub fn new() -> Self { + Self { + api_key: env::var(ENV_VAR_NAME).ok(), + } + } + + /// Sets the API key for Serper. + pub fn with_api_key(mut self, api_key: String) -> Self { + self.api_key = Some(api_key); + self + } + + /// Check if Serper API KEY exists and if it does, tries a dummy request. + /// Fails if the provided API KEY is not authorized enough for the dummy request. + /// + /// Equivalent cURL is as follows: + /// + /// ```sh + /// curl -X POST 'https://google.serper.dev/search' \ + /// -H 'X-API-KEY: API_KEY' \ + /// -H 'Content-Type: application/json' \ + /// -d '{ + /// "q": "Your search query here" + /// }' + /// ``` + pub async fn check_optional(&self) -> Result<()> { + // check API key + let Some(api_key) = &self.api_key else { + log::debug!("Serper API key not found, skipping Serper check"); + return Ok(()); + }; + log::info!("Serper API key found, checking Serper service"); + + // make a dummy request + let client = Client::new(); + let request = client + .post(SERPER_EXAMPLE_ENDPOINT) + .header("X-API-KEY", api_key) + .header("Content-Type", "application/json") + .body("{\"q\": \"Your search query here\"}") + .build() + .wrap_err("failed to build request")?; + + let response = client + .execute(request) + .await + .wrap_err("failed to send request")?; + + // parse response + if response.status().is_client_error() { + return Err(eyre!("Failed to make Serper request",)) + .wrap_err(response.text().await.unwrap_or_default()); + } + + log::info!("Serper check succesful!"); + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + #[ignore = "requires Serper API key"] + async fn test_serper_check() { + let _ = dotenvy::dotenv(); + assert!(env::var(ENV_VAR_NAME).is_ok()); + let res = SerperConfig::new().check_optional().await; + assert!(res.is_ok(), "should pass with api key"); + + env::set_var(ENV_VAR_NAME, "i-dont-work"); + let res = SerperConfig::new().check_optional().await; + assert!(res.is_err(), "should fail with bad api key"); + + env::remove_var(ENV_VAR_NAME); + let res = SerperConfig::new().check_optional().await; + assert!(res.is_ok(), "should pass without api key"); + } +} diff --git a/workflows/src/config.rs b/workflows/src/config.rs index 92c7f36..2124088 100644 --- a/workflows/src/config.rs +++ b/workflows/src/config.rs @@ -1,4 +1,8 @@ -use crate::{split_csv_line, Model, ModelProvider, OllamaConfig, OpenAIConfig}; +use crate::{ + apis::{JinaConfig, SerperConfig}, + providers::{OllamaConfig, OpenAIConfig}, + split_csv_line, Model, ModelProvider, +}; use eyre::{eyre, Result}; use rand::seq::IteratorRandom; // provides Vec<_>.choose @@ -12,6 +16,12 @@ pub struct DriaWorkflowsConfig { /// OpenAI configurations, e.g. API key, in case OpenAI is used. /// Otherwise, can be ignored. pub openai: OpenAIConfig, + /// Serper configurations, e.g. API key, in case Serper is given in environment. + /// Otherwise, can be ignored. + pub serper: SerperConfig, + /// Jina configurations, e.g. API key, in case Jina is used. + /// Otherwise, can be ignored. + pub jina: JinaConfig, } impl DriaWorkflowsConfig { @@ -26,6 +36,8 @@ impl DriaWorkflowsConfig { models: models_and_providers, openai: OpenAIConfig::new(), ollama: OllamaConfig::new(), + serper: SerperConfig::new(), + jina: JinaConfig::new(), } } @@ -167,6 +179,12 @@ impl DriaWorkflowsConfig { pub async fn check_services(&mut self) -> Result<()> { log::info!("Checking configured services."); + // check Serper + self.serper.check_optional().await?; + + // check Jina + self.jina.check_optional().await?; + // TODO: can refactor (provider, model) logic here let unique_providers = self.get_providers(); diff --git a/workflows/src/lib.rs b/workflows/src/lib.rs index 42c6618..2c6302e 100644 --- a/workflows/src/lib.rs +++ b/workflows/src/lib.rs @@ -1,9 +1,10 @@ +mod providers; + +mod apis; + mod utils; pub use utils::split_csv_line; -mod providers; -use providers::{OllamaConfig, OpenAIConfig}; - mod config; pub use config::DriaWorkflowsConfig;