diff --git a/p11nethsm.conf b/p11nethsm.conf index 85a79c3f..927249e1 100644 --- a/p11nethsm.conf +++ b/p11nethsm.conf @@ -15,4 +15,7 @@ slots: danger_insecure_cert: true # sha256_fingerprints: # - "31:92:8E:A4:5E:16:5C:A7:33:44:E8:E9:8E:64:C4:AE:7B:2A:57:E5:77:43:49:F3:69:C9:8F:C4:2F:3A:3B:6E" - # timeout: 10 + retries: + count: 10 + delay_seconds: 1 + timeout: 10 diff --git a/pkcs11/src/api/object.rs b/pkcs11/src/api/object.rs index 19da63bf..bac96258 100644 --- a/pkcs11/src/api/object.rs +++ b/pkcs11/src/api/object.rs @@ -393,7 +393,7 @@ mod tests { device_error: 0, enum_ctx: None, flags: 0, - login_ctx: LoginCtx::new(None, None, vec![]), + login_ctx: LoginCtx::new(None, None, vec![], None), slot_id: 0, }; diff --git a/pkcs11/src/api/token.rs b/pkcs11/src/api/token.rs index 0dd92b8b..8cc3116d 100644 --- a/pkcs11/src/api/token.rs +++ b/pkcs11/src/api/token.rs @@ -87,7 +87,7 @@ pub extern "C" fn C_GetSlotInfo( let mut flags = 0; - let mut login_ctx = LoginCtx::new(None, None, slot.instances.clone()); + let mut login_ctx = LoginCtx::new(None, None, slot.instances.clone(), slot.retries); let result = login_ctx.try_( |conf| default_api::info_get(&conf), @@ -160,7 +160,12 @@ pub extern "C" fn C_GetTokenInfo( return cryptoki_sys::CKR_ARGUMENTS_BAD; } - let mut login_ctx = LoginCtx::new(None, slot.administrator.clone(), slot.instances.clone()); + let mut login_ctx = LoginCtx::new( + None, + slot.administrator.clone(), + slot.instances.clone(), + slot.retries, + ); let result = login_ctx.try_( |conf| default_api::info_get(&conf), diff --git a/pkcs11/src/backend/events.rs b/pkcs11/src/backend/events.rs index 2b373224..df8ece40 100644 --- a/pkcs11/src/backend/events.rs +++ b/pkcs11/src/backend/events.rs @@ -36,7 +36,7 @@ pub fn update_slot_state(slot_id: CK_SLOT_ID, present: bool) { pub fn fetch_slots_state() { for (index, slot) in DEVICE.slots.iter().enumerate() { - let mut login_ctx = LoginCtx::new(None, None, slot.instances.clone()); + let mut login_ctx = LoginCtx::new(None, None, slot.instances.clone(), slot.retries); let status = login_ctx .try_( |conf| default_api::health_state_get(&conf), diff --git a/pkcs11/src/backend/login.rs b/pkcs11/src/backend/login.rs index c34b5e0a..49a5e15e 100644 --- a/pkcs11/src/backend/login.rs +++ b/pkcs11/src/backend/login.rs @@ -3,14 +3,15 @@ use cryptoki_sys::{ CKR_USER_TYPE_INVALID, CKS_RO_PUBLIC_SESSION, CKS_RW_SO_FUNCTIONS, CKS_RW_USER_FUNCTIONS, CKU_CONTEXT_SPECIFIC, CKU_SO, CKU_USER, CK_RV, CK_STATE, CK_USER_TYPE, }; -use log::{debug, error, trace}; +use log::{debug, error, trace, warn}; use nethsm_sdk_rs::{ apis::{self, configuration::Configuration, default_api, ResponseContent}, models::UserRole, ureq, }; +use std::{thread, time::Duration}; -use crate::config::config_file::UserConfig; +use crate::config::config_file::{RetryConfig, UserConfig}; use super::{ApiError, Error}; @@ -21,6 +22,7 @@ pub struct LoginCtx { instances: Vec, index: usize, ck_state: CK_STATE, + retries: Option, } #[derive(Debug, Clone)] @@ -64,6 +66,7 @@ impl LoginCtx { operator: Option, administrator: Option, instances: Vec, + retries: Option, ) -> Self { let mut ck_state = CKS_RO_PUBLIC_SESSION; @@ -81,6 +84,7 @@ impl LoginCtx { operator, administrator, instances, + retries, index: 0, ck_state, } @@ -193,35 +197,53 @@ impl LoginCtx { F: FnOnce(&Configuration) -> Result> + Clone, { // we loop for a maximum of instances.len() times - for _ in 0..self.instances.len() { + 'instances: for _ in 0..self.instances.len() { let conf = match self.get_config_user_mode(&user_mode) { Some(conf) => conf, None => continue, }; - let api_call_clone = api_call.clone(); - match api_call_clone(&conf) { - Ok(result) => return Ok(result), - - // If the server is in an unusable state, try the next one - Err(apis::Error::ResponseError(ResponseContent { status: 500, .. })) - | Err(apis::Error::ResponseError(ResponseContent { status: 501, .. })) - | Err(apis::Error::ResponseError(ResponseContent { status: 502, .. })) - | Err(apis::Error::ResponseError(ResponseContent { status: 503, .. })) - | Err(apis::Error::ResponseError(ResponseContent { status: 412, .. })) => { - continue; - } + let mut retry_count = 0; + let RetryConfig { + count: retry_limit, + delay_seconds, + } = self.retries.unwrap_or(RetryConfig { + count: 1, + delay_seconds: 0, + }); + + loop { + retry_count += 1; + let api_call_clone = api_call.clone(); + match api_call_clone(&conf) { + Ok(result) => return Ok(result), + + // If the server is in an unusable state, try the next one + Err(apis::Error::ResponseError(ResponseContent { status: 500, .. })) + | Err(apis::Error::ResponseError(ResponseContent { status: 501, .. })) + | Err(apis::Error::ResponseError(ResponseContent { status: 502, .. })) + | Err(apis::Error::ResponseError(ResponseContent { status: 503, .. })) + | Err(apis::Error::ResponseError(ResponseContent { status: 412, .. })) => { + continue 'instances; + } - Err(apis::Error::Ureq(ureq::Error::Transport(err))) => { - if matches!( - err.kind(), - ureq::ErrorKind::Io | ureq::ErrorKind::ConnectionFailed - ) { - return Err(ApiError::InstanceRemoved); + Err(apis::Error::Ureq(ureq::Error::Transport(err))) + if matches!( + err.kind(), + ureq::ErrorKind::Io | ureq::ErrorKind::ConnectionFailed + ) => + { + if retry_count == retry_limit { + error!("Retry count exceeded, instance is unreachable: {err}"); + return Err(ApiError::InstanceRemoved); + } + + warn!("IO error connecting to the instance, {err}, retrying in {delay_seconds}s"); + thread::sleep(Duration::from_secs(delay_seconds)); } + // Otherwise, return the error + Err(err) => return Err(err.into()), } - // Otherwise, return the error - Err(err) => return Err(err.into()), } } Err(ApiError::NoInstance) diff --git a/pkcs11/src/backend/session.rs b/pkcs11/src/backend/session.rs index a240611a..39d4a9fa 100644 --- a/pkcs11/src/backend/session.rs +++ b/pkcs11/src/backend/session.rs @@ -95,6 +95,7 @@ impl SessionManager { 0, Arc::new(Slot { administrator: None, + retries: None, db: Arc::new(Mutex::new(Db::new())), description: None, instances: vec![], @@ -125,6 +126,7 @@ impl Session { slot.operator.clone(), slot.administrator.clone(), slot.instances.clone(), + slot.retries, ); Self { diff --git a/pkcs11/src/config/config_file.rs b/pkcs11/src/config/config_file.rs index 8f8371b1..3aa013fe 100644 --- a/pkcs11/src/config/config_file.rs +++ b/pkcs11/src/config/config_file.rs @@ -87,6 +87,12 @@ pub struct P11Config { pub slots: Vec, } +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub struct RetryConfig { + pub count: u32, + pub delay_seconds: u64, +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct InstanceConfig { pub url: String, @@ -104,7 +110,9 @@ pub struct SlotConfig { pub description: Option, pub instances: Vec, #[serde(default)] - pub timeout: Option, + pub retries: Option, + #[serde(default)] + pub timeout_seconds: Option, } // An user diff --git a/pkcs11/src/config/device.rs b/pkcs11/src/config/device.rs index 0c2b3048..977d073d 100644 --- a/pkcs11/src/config/device.rs +++ b/pkcs11/src/config/device.rs @@ -4,7 +4,7 @@ use nethsm_sdk_rs::apis::configuration::Configuration; use crate::backend::db::Db; -use super::config_file::UserConfig; +use super::config_file::{RetryConfig, UserConfig}; // stores the global configuration of the module #[derive(Debug, Clone)] @@ -22,6 +22,7 @@ pub struct ClusterInstance { #[derive(Debug, Clone)] pub struct Slot { pub label: String, + pub retries: Option, pub description: Option, pub instances: Vec, pub operator: Option, diff --git a/pkcs11/src/config/initialization.rs b/pkcs11/src/config/initialization.rs index 17981f34..b64f53e7 100644 --- a/pkcs11/src/config/initialization.rs +++ b/pkcs11/src/config/initialization.rs @@ -125,7 +125,7 @@ fn slot_from_config(slot: &SlotConfig) -> Result { .max_idle_connections(2) .max_idle_connections_per_host(2); - if let Some(t) = slot.timeout { + if let Some(t) = slot.timeout_seconds { builder = builder.timeout(Duration::from_secs(t)); } @@ -147,6 +147,7 @@ fn slot_from_config(slot: &SlotConfig) -> Result { instances, administrator: slot.administrator.clone(), operator: slot.operator.clone(), + retries: slot.retries, db: Arc::new(Mutex::new(crate::backend::db::Db::new())), }) }