diff --git a/pingora-core/src/server/mod.rs b/pingora-core/src/server/mod.rs index d9e57ddc..1e1eb7dc 100644 --- a/pingora-core/src/server/mod.rs +++ b/pingora-core/src/server/mod.rs @@ -76,16 +76,17 @@ pub struct Server { // TODO: delete the pid when exit impl Server { - async fn main_loop(&self) -> ShutdownType { + async fn main_loop(&self) -> (ShutdownType, bool) { // waiting for exit signal // TODO: there should be a signal handling function let mut graceful_upgrade_signal = unix::signal(unix::SignalKind::quit()).unwrap(); let mut graceful_terminate_signal = unix::signal(unix::SignalKind::terminate()).unwrap(); let mut fast_shutdown_signal = unix::signal(unix::SignalKind::interrupt()).unwrap(); + let mut reload_signal = unix::signal(unix::SignalKind::hangup()).unwrap(); tokio::select! { _ = fast_shutdown_signal.recv() => { info!("SIGINT received, exiting"); - ShutdownType::Quick + (ShutdownType::Quick, false) }, _ = graceful_terminate_signal.recv() => { // we receive a graceful terminate, all instances are instructed to stop @@ -99,45 +100,55 @@ impl Server { } } info!("Broadcast graceful shutdown complete"); - ShutdownType::Graceful + (ShutdownType::Graceful, false) } _ = graceful_upgrade_signal.recv() => { - // TODO: still need to select! on signals in case a fast shutdown is needed - // aka: move below to another task and only kick it off here info!("SIGQUIT received, sending socks and gracefully exiting"); - if let Some(fds) = &self.listen_fds { - let fds = fds.lock().await; - info!("Trying to send socks"); - // XXX: this is blocking IO - match fds.send_to_sock( - self.configuration.as_ref().upgrade_sock.as_str()) - { - Ok(_) => {info!("listener sockets sent");}, - Err(e) => { - error!("Unable to send listener sockets to new process: {e}"); - // sentry log error on fd send failure - #[cfg(not(debug_assertions))] - sentry::capture_error(&e); - } - } - sleep(Duration::from_secs(CLOSE_TIMEOUT)).await; - info!("Broadcasting graceful shutdown"); - // gracefully exiting - match self.shutdown_watch.send(true) { - Ok(_) => { info!("Graceful shutdown started!"); } - Err(e) => { - error!("Graceful shutdown broadcast failed: {e}"); - // switch to fast shutdown - return ShutdownType::Graceful; - } - } - info!("Broadcast graceful shutdown complete"); - ShutdownType::Graceful - } else { - info!("No socks to send, shutting down."); - ShutdownType::Graceful - } + self.handle_gracefull_upgrade_signal().await; + (ShutdownType::Graceful, false) }, + _ = reload_signal.recv() => { + info!("SIGHUP received, sending socks and gracefully reloading"); + self.handle_gracefull_upgrade_signal().await; + (ShutdownType::Graceful, true) + } + } + } + + async fn handle_gracefull_upgrade_signal(&self) { + // TODO: still need to select! on signals in case a fast shutdown is needed + // aka: move below to another task and only kick it off here + if let Some(fds) = &self.listen_fds { + let fds = fds.lock().await; + info!("Trying to send socks"); + // XXX: this is blocking IO + match fds.send_to_sock(self.configuration.as_ref().upgrade_sock.as_str()) { + Ok(_) => { + info!("listener sockets sent"); + } + Err(e) => { + error!("Unable to send listener sockets to new process: {e}"); + // sentry log error on fd send failure + #[cfg(not(debug_assertions))] + sentry::capture_error(&e); + } + } + sleep(Duration::from_secs(CLOSE_TIMEOUT)).await; + info!("Broadcasting graceful shutdown"); + // gracefully exiting + match self.shutdown_watch.send(true) { + Ok(_) => { + info!("Graceful shutdown started!"); + } + Err(e) => { + error!("Graceful shutdown broadcast failed: {e}"); + // switch to fast shutdown + return; + } + } + info!("Broadcast graceful shutdown complete"); + } else { + info!("No socks to send, shutting down."); } } @@ -252,6 +263,22 @@ impl Server { /// When trying to zero downtime upgrade from an older version of the server which is already /// running, this function will try to get all its listening sockets in order to take them over. pub fn bootstrap(&mut self) { + match self.try_bootstrap() { + Ok(true) => { + std::process::exit(0); + } + Ok(false) => {} + Err(_) => { + std::process::exit(1); + } + } + } + + /// Prepare the server to start + /// + /// When trying to zero downtime upgrade from an older version of the server which is already + /// running, this function will try to get all its listening sockets in order to take them over. + pub fn try_bootstrap(&mut self) -> Result { info!("Bootstrap starting"); debug!("{:#?}", self.options); @@ -261,13 +288,14 @@ impl Server { if self.options.as_ref().map_or(false, |o| o.test) { info!("Server Test passed, exiting"); - std::process::exit(0); + return Ok(true); } // load fds match self.load_fds(self.options.as_ref().map_or(false, |o| o.upgrade)) { Ok(_) => { info!("Bootstrap done"); + Ok(false) } Err(e) => { // sentry log error on fd load failure @@ -275,7 +303,10 @@ impl Server { sentry::capture_error(&e); error!("Bootstrap failed on error: {:?}, exiting.", e); - std::process::exit(1); + Err(Error::explain( + ErrorType::Custom("BootstrapFdLoadError"), + e.desc(), + )) } } } @@ -287,12 +318,25 @@ impl Server { /// /// Note: this function may fork the process for daemonization, so any additional threads created /// before this function will be lost to any service logic once this function is called. - pub fn run_forever(mut self) -> ! { + pub fn run_forever(self) -> ! { + let daemon = self.configuration.daemon; + self.run_server(daemon).unwrap(); + std::process::exit(0) + } + + /// Start the server + /// + /// This function will block forever until the server needs to quit or reload. So this would be the last + /// function to call for this object. + /// + /// Note: this function may fork the process for daemonization, so any additional threads created + /// before this function will be lost to any service logic once this function is called. + pub fn run_server(mut self, enable_daemon: bool) -> Result { info!("Server starting"); let conf = self.configuration.as_ref(); - if conf.daemon { + if enable_daemon { info!("Daemonizing the server"); fast_timeout::pause_for_fork(); daemonize(&self.configuration); @@ -320,7 +364,7 @@ impl Server { // blocked on main loop so that it runs forever // Only work steal runtime can use block_on() let server_runtime = Server::create_runtime("Server", 1, true); - let shutdown_type = server_runtime.get_handle().block_on(self.main_loop()); + let (shutdown_type, reload) = server_runtime.get_handle().block_on(self.main_loop()); if matches!(shutdown_type, ShutdownType::Graceful) { let exit_timeout = self @@ -359,7 +403,7 @@ impl Server { } } info!("All runtimes exited, exiting now"); - std::process::exit(0) + Ok(reload) } fn create_runtime(name: &str, threads: usize, work_steal: bool) -> Runtime { diff --git a/pingora/examples/server_reload.rs b/pingora/examples/server_reload.rs new file mode 100644 index 00000000..8d70ee94 --- /dev/null +++ b/pingora/examples/server_reload.rs @@ -0,0 +1,87 @@ +use log::{error, info}; +use pingora::protocols::TcpKeepalive; +use pingora::server::configuration::Opt; +use pingora::server::Server; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::time::Duration; +use tokio::signal::unix; + +mod app; +mod service; + +pub fn main() { + env_logger::init(); + + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(1) + .enable_all() + .build() + .unwrap(); + + let args_opt = Opt::parse_args(); + + rt.block_on(async move { + let mut reload_signal = unix::signal(unix::SignalKind::hangup()).unwrap(); + let upgrade = Arc::new(AtomicBool::new(args_opt.upgrade)); + let conf_filename = args_opt.conf; + + loop { + let conf_filename = conf_filename.clone(); + let upgrade = upgrade.clone(); + let upgrade_for_store = upgrade.clone(); + let task = tokio::spawn(async move { + let opt = Opt { + conf: conf_filename, + upgrade: upgrade.load(Ordering::SeqCst), + ..Opt::default() + }; + let opt = Some(opt); + let mut my_server = Server::new(opt).unwrap(); + my_server.try_bootstrap().unwrap(); + + let mut echo_service_http = service::echo::echo_service_http(); + + let mut options = pingora::listeners::TcpSocketOptions::default(); + options.tcp_fastopen = Some(10); + options.tcp_keepalive = Some(TcpKeepalive { + idle: Duration::from_secs(60), + interval: Duration::from_secs(5), + count: 5, + }); + + echo_service_http.add_tcp_with_settings("0.0.0.0:6145", options); + my_server.add_service(echo_service_http); + + let server_task = + tokio::task::spawn_blocking(move || match my_server.run_server(false) { + Ok(reload) => { + info!("Reload: {}", reload); + } + Err(e) => { + error!("Failed to run server: {}", e); + } + }); + server_task.await.unwrap(); + }); + + tokio::select! { + _ = reload_signal.recv() => { + #[cfg(target_os = "linux")] + { + upgrade_for_store.store(true, Ordering::SeqCst); + } + #[cfg(not(target_os = "linux"))] + { + info!("Upgrade is only supported on Linux"); + } + } + _ = task => { + info!("Server task finished"); + break; + } + } + } + }); + rt.shutdown_background(); +}