diff --git a/drv/cpu-seq-api/src/lib.rs b/drv/cpu-seq-api/src/lib.rs index 8ab006e09..4dc3ba20b 100644 --- a/drv/cpu-seq-api/src/lib.rs +++ b/drv/cpu-seq-api/src/lib.rs @@ -9,6 +9,7 @@ use counters::Count; use derive_idol_err::IdolError; use userlib::{sys_send, FromPrimitive}; +use zerocopy::AsBytes; // Re-export PowerState for client convenience. pub use drv_cpu_power_state::PowerState; @@ -31,6 +32,36 @@ pub enum SeqError { ServerRestarted, } +#[derive(Copy, Clone, Debug, FromPrimitive, Eq, PartialEq, AsBytes, Count)] +#[repr(u8)] +pub enum StateChangeReason { + /// No reason was provided. + /// + /// This indicates a legacy caller of `Sequencer.set_state`, rather than + /// `Sequencer.set_state_with_reason`. All Hubris-internal callers should + /// use `set_state_with_reason`, so this variant generally indicates that + /// the `Sequencer.set_state` IPC is being called via Hiffy. + Other = 1, + /// The system has just received power, so the sequencer has booted the + /// host CPU. + InitialPowerOn, + /// A power state change was requested by the control plane. + ControlPlane, + /// The host CPU reset while in A0, so the system has powered off to clear + /// hidden core state. + CpuReset, + /// The host OS failed to boot, so the system has powered off. + HostBootFailure, + /// The host OS panicked. + HostPanic, + /// The host OS requested that the system power off without rebooting. + HostPowerOff, + /// The host OS requested that the system reboot. + HostReboot, + /// The system powered off because a component has overheated. + Overheat, +} + // On Gimlet, we have two banks of up to 8 DIMMs apiece. Export the "two banks" // bit of knowledge here so it can be used by gimlet-seq-server, spd, and // packrat, all of which want to know at compile-time how many banks there are. diff --git a/drv/gimlet-seq-server/src/main.rs b/drv/gimlet-seq-server/src/main.rs index 0c3776e96..2e66c25b5 100644 --- a/drv/gimlet-seq-server/src/main.rs +++ b/drv/gimlet-seq-server/src/main.rs @@ -17,7 +17,7 @@ use userlib::{ sys_set_timer, task_slot, units, RecvMessage, TaskId, UnwrapLite, }; -use drv_cpu_seq_api::{PowerState, SeqError}; +use drv_cpu_seq_api::{PowerState, SeqError, StateChangeReason}; use drv_hf_api as hf_api; use drv_i2c_api as i2c; use drv_ice40_spi_program as ice40; @@ -90,7 +90,13 @@ enum Trace { RailsOn, UartEnabled, A0(u16), - SetState(PowerState, PowerState, u64), + SetState { + prev: PowerState, + next: PowerState, + #[count(children)] + why: StateChangeReason, + now: u64, + }, UpdateState(#[count(children)] PowerState), ClockConfigWrite, ClockConfigSuccess, @@ -482,7 +488,10 @@ impl ServerImpl { // Power on, unless suppressed by the `stay-in-a2` feature if !cfg!(feature = "stay-in-a2") { - _ = server.set_state_internal(PowerState::A0); + _ = server.set_state_internal( + PowerState::A0, + StateChangeReason::InitialPowerOn, + ); } // @@ -666,11 +675,17 @@ impl ServerImpl { fn set_state_internal( &mut self, state: PowerState, + why: StateChangeReason, ) -> Result<(), SeqError> { let sys = sys_api::Sys::from(SYS.get_task_id()); let now = sys_get_timer().now; - ringbuf_entry!(Trace::SetState(self.state, state, now)); + ringbuf_entry!(Trace::SetState { + prev: self.state, + next: state, + why, + now + }); ringbuf_entry_v3p3_sys_a0_vout(); @@ -813,6 +828,7 @@ impl ServerImpl { // // Now wait for the end of Group C. + // loop { let mut status = [0u8]; @@ -1033,7 +1049,18 @@ impl idl::InOrderSequencerImpl for ServerImpl { _: &RecvMessage, state: PowerState, ) -> Result<(), RequestError> { - self.set_state_internal(state).map_err(RequestError::from) + self.set_state_internal(state, StateChangeReason::Other) + .map_err(RequestError::from) + } + + fn set_state_with_reason( + &mut self, + _: &RecvMessage, + state: PowerState, + reason: StateChangeReason, + ) -> Result<(), RequestError> { + self.set_state_internal(state, reason) + .map_err(RequestError::from) } fn send_hardware_nmi( @@ -1403,7 +1430,7 @@ cfg_if::cfg_if! { } mod idl { - use super::SeqError; + use super::{SeqError, StateChangeReason}; include!(concat!(env!("OUT_DIR"), "/server_stub.rs")); } diff --git a/drv/grapefruit-seq-server/src/main.rs b/drv/grapefruit-seq-server/src/main.rs index 176ac1570..5787e7cec 100644 --- a/drv/grapefruit-seq-server/src/main.rs +++ b/drv/grapefruit-seq-server/src/main.rs @@ -7,7 +7,7 @@ #![no_std] #![no_main] -use drv_cpu_seq_api::PowerState; +use drv_cpu_seq_api::{PowerState, StateChangeReason}; use drv_spi_api::{SpiDevice, SpiServer}; use drv_stm32xx_sys_api as sys_api; use idol_runtime::{NotificationHandler, RequestError}; @@ -29,6 +29,7 @@ enum Trace { ContinueBitstreamLoad(usize), WaitForDone, Programmed, + #[count(skip)] None, } @@ -268,6 +269,20 @@ impl ServerImpl { fn set_state_impl(&self, state: PowerState) { self.jefe.set_state(state as u32); } + + fn validate_state_change( + &self, + state: PowerState, + ) -> Result<(), drv_cpu_seq_api::SeqError> { + match (self.get_state_impl(), state) { + (PowerState::A2, PowerState::A0) + | (PowerState::A0, PowerState::A2) + | (PowerState::A0PlusHP, PowerState::A2) + | (PowerState::A0Thermtrip, PowerState::A2) => Ok(()), + + _ => Err(drv_cpu_seq_api::SeqError::IllegalTransition), + } + } } // The `Sequencer` implementation for Grapefruit is copied from @@ -286,19 +301,20 @@ impl idl::InOrderSequencerImpl for ServerImpl { _: &RecvMessage, state: PowerState, ) -> Result<(), RequestError> { - match (self.get_state_impl(), state) { - (PowerState::A2, PowerState::A0) - | (PowerState::A0, PowerState::A2) - | (PowerState::A0PlusHP, PowerState::A2) - | (PowerState::A0Thermtrip, PowerState::A2) => { - self.set_state_impl(state); - Ok(()) - } + self.validate_state_change(state)?; + self.set_state_impl(state); + Ok(()) + } - _ => Err(RequestError::Runtime( - drv_cpu_seq_api::SeqError::IllegalTransition, - )), - } + fn set_state_with_reason( + &mut self, + _: &RecvMessage, + state: PowerState, + _: StateChangeReason, + ) -> Result<(), RequestError> { + self.validate_state_change(state)?; + self.set_state_impl(state); + Ok(()) } fn send_hardware_nmi( @@ -327,7 +343,7 @@ impl NotificationHandler for ServerImpl { } mod idl { - use drv_cpu_seq_api::SeqError; + use drv_cpu_seq_api::{SeqError, StateChangeReason}; include!(concat!(env!("OUT_DIR"), "/server_stub.rs")); } diff --git a/drv/mock-gimlet-seq-server/src/main.rs b/drv/mock-gimlet-seq-server/src/main.rs index 64b0733bd..46d9f48c4 100644 --- a/drv/mock-gimlet-seq-server/src/main.rs +++ b/drv/mock-gimlet-seq-server/src/main.rs @@ -7,7 +7,7 @@ #![no_std] #![no_main] -use drv_cpu_seq_api::{PowerState, SeqError}; +use drv_cpu_seq_api::{PowerState, SeqError, StateChangeReason}; use idol_runtime::{NotificationHandler, RequestError}; use task_jefe_api::Jefe; use userlib::{FromPrimitive, RecvMessage, UnwrapLite}; @@ -44,6 +44,17 @@ impl ServerImpl { fn set_state_impl(&self, state: PowerState) { self.jefe.set_state(state as u32); } + + fn validate_state_change(&self, state: PowerState) -> Result<(), SeqError> { + match (self.get_state_impl(), state) { + (PowerState::A2, PowerState::A0) + | (PowerState::A0, PowerState::A2) + | (PowerState::A0PlusHP, PowerState::A2) + | (PowerState::A0Thermtrip, PowerState::A2) => Ok(()), + + _ => Err(SeqError::IllegalTransition), + } + } } impl idl::InOrderSequencerImpl for ServerImpl { @@ -59,17 +70,20 @@ impl idl::InOrderSequencerImpl for ServerImpl { _: &RecvMessage, state: PowerState, ) -> Result<(), RequestError> { - match (self.get_state_impl(), state) { - (PowerState::A2, PowerState::A0) - | (PowerState::A0, PowerState::A2) - | (PowerState::A0PlusHP, PowerState::A2) - | (PowerState::A0Thermtrip, PowerState::A2) => { - self.set_state_impl(state); - Ok(()) - } + self.validate_state_change(state)?; + self.set_state_impl(state); + Ok(()) + } - _ => Err(RequestError::Runtime(SeqError::IllegalTransition)), - } + fn set_state_with_reason( + &mut self, + _: &RecvMessage, + state: PowerState, + _: StateChangeReason, + ) -> Result<(), RequestError> { + self.validate_state_change(state)?; + self.set_state_impl(state); + Ok(()) } fn send_hardware_nmi( @@ -99,7 +113,7 @@ impl NotificationHandler for ServerImpl { } mod idl { - use super::SeqError; + use super::{SeqError, StateChangeReason}; include!(concat!(env!("OUT_DIR"), "/server_stub.rs")); } diff --git a/idl/cpu-seq.idol b/idl/cpu-seq.idol index f345f36ed..12015daa7 100644 --- a/idl/cpu-seq.idol +++ b/idl/cpu-seq.idol @@ -13,12 +13,29 @@ Interface( idempotent: true, ), "set_state": ( + doc: "Set the power state without providing a reason (legacy).", + args: { + "state": ( + type: "drv_cpu_power_state::PowerState", + recv: FromPrimitive("u8"), + ), + }, + reply: Result( + ok: "()", + err: CLike("SeqError"), + ), + ), + "set_state_with_reason": ( doc: "Set the power state", args: { "state": ( type: "drv_cpu_power_state::PowerState", recv: FromPrimitive("u8"), - ) + ), + "reason": ( + type: "StateChangeReason", + recv: FromPrimitive("u8"), + ), }, reply: Result( ok: "()", diff --git a/task/control-plane-agent/src/mgs_compute_sled.rs b/task/control-plane-agent/src/mgs_compute_sled.rs index d191cefcd..54c46855b 100644 --- a/task/control-plane-agent/src/mgs_compute_sled.rs +++ b/task/control-plane-agent/src/mgs_compute_sled.rs @@ -728,7 +728,10 @@ impl SpHandler for MgsHandler { }; self.sequencer - .set_state(power_state) + .set_state_with_reason( + power_state, + drv_cpu_seq_api::StateChangeReason::ControlPlane, + ) .map_err(|e| SpError::PowerStateError(e as u32)) } diff --git a/task/host-sp-comms/src/main.rs b/task/host-sp-comms/src/main.rs index f0d39438c..70c772956 100644 --- a/task/host-sp-comms/src/main.rs +++ b/task/host-sp-comms/src/main.rs @@ -12,7 +12,7 @@ use attest_data::messages::{ HostToRotCommand, RecvSprotError as AttestDataSprotError, RotToHost, MAX_DATA_LEN, }; -use drv_cpu_seq_api::{PowerState, SeqError, Sequencer}; +use drv_cpu_seq_api::{PowerState, SeqError, Sequencer, StateChangeReason}; use drv_hf_api::{HfDevSelect, HfMuxState, HostFlash}; use drv_sprot_api::SpRot; use drv_stm32xx_sys_api as sys_api; @@ -106,6 +106,7 @@ enum Trace { now: u64, #[count(children)] state: PowerState, + why: StateChangeReason, }, HfMux { now: u64, @@ -250,6 +251,12 @@ struct ServerImpl { reboot_state: Option, host_kv_storage: HostKeyValueStorage, hf_mux_state: Option, + /// Set when the host OS fails to boot or panics, and unset when the system + /// reboots. + /// + /// This is used to determine whether a host-triggered power-off is due to a + /// kernel panic, boot failure, or was a normal power-off. + last_power_off: Option, } impl ServerImpl { @@ -317,6 +324,7 @@ impl ServerImpl { dtrace_conf_len: 0, }, hf_mux_state: None, + last_power_off: None, } } @@ -374,23 +382,31 @@ impl ServerImpl { // basically only ever succeed in our initial set_state() request, so I // don't know how we'd test it fn power_off_host(&mut self, reboot: bool) { + let why = match self.last_power_off { + None if reboot => StateChangeReason::HostReboot, + None => StateChangeReason::HostPowerOff, + Some(reason) => reason, + }; loop { // Attempt to move to A2; given we only call this function in // response to a host request, we expect we're currently in A0 and // this should work. - let err = match self.sequencer.set_state(PowerState::A2) { - Ok(()) => { - ringbuf_entry!(Trace::SetState { - now: sys_get_timer().now, - state: PowerState::A2, - }); - if reboot { - self.reboot_state = Some(RebootState::WaitingForA2); + let err = + match self.sequencer.set_state_with_reason(PowerState::A2, why) + { + Ok(()) => { + ringbuf_entry!(Trace::SetState { + now: sys_get_timer().now, + why, + state: PowerState::A2, + }); + if reboot { + self.reboot_state = Some(RebootState::WaitingForA2); + } + return; } - return; - } - Err(err) => err, - }; + Err(err) => err, + }; // The only error we should see from `set_state()` is an illegal // transition, if we're not currently in A0. @@ -463,10 +479,14 @@ impl ServerImpl { // we cannot let the SoC simply reset because the true state // of hidden cores is unknown: explicitly bounce to A2 // as if the host had requested it. + self.last_power_off = Some(StateChangeReason::CpuReset); self.power_off_host(true); } PowerState::A0 | PowerState::A0PlusHP | PowerState::A0Thermtrip => { + // Clear the last power-off, as we have now reached A0; + // subsequent power-offs will set a new reason. + self.last_power_off = None; // TODO should we clear self.reboot_state here? What if we // transitioned from one A0 state to another? For now, leave it // set, and we'll move back to A0 whenever we transition to @@ -795,6 +815,9 @@ impl ServerImpl { Some(response) } HostToSp::HostBootFailure { reason } => { + // Indicate that the host boot failed, so that we can then tell + // sequencer why we are asking it to power off the system. + self.last_power_off = Some(StateChangeReason::HostBootFailure); // TODO forward to MGS // // For now, copy it into a static var we can pull out via @@ -812,6 +835,14 @@ impl ServerImpl { Some(SpToHost::Ack) } HostToSp::HostPanic => { + // Indicate that a subsequent power-off request will be due to a + // panic. Since a host panic message is also sent after a host + // boot failure, only set the last power-off reason if we + // haven't just seen a boot failure. + if self.last_power_off.is_none() { + self.last_power_off = Some(StateChangeReason::HostPanic); + } + // TODO forward to MGS // // For now, copy it into a static var we can pull out via @@ -1354,6 +1385,10 @@ impl NotificationHandler for ServerImpl { handle_reboot_waiting_in_a2_timer( &self.sequencer, &mut self.reboot_state, + // If the last power-off did not set a reason, it must + // be a host-requested reboot. + self.last_power_off + .unwrap_or(StateChangeReason::HostReboot), ); } Timers::TxPeriodicZeroByte => { @@ -1407,6 +1442,7 @@ fn parse_received_message( fn handle_reboot_waiting_in_a2_timer( sequencer: &Sequencer, reboot_state: &mut Option, + why: StateChangeReason, ) { // If we're past the deadline for transitioning to A0, attempt to do so. if let Some(RebootState::WaitingInA2RebootDelay) = reboot_state { @@ -1418,9 +1454,10 @@ fn handle_reboot_waiting_in_a2_timer( // we've done what we can to reboot, so clear out `reboot_state`. ringbuf_entry!(Trace::SetState { now: sys_get_timer().now, + why, state: PowerState::A0, }); - _ = sequencer.set_state(PowerState::A0); + _ = sequencer.set_state_with_reason(PowerState::A0, why); *reboot_state = None; } } diff --git a/task/thermal/src/bsp/gimlet_bcdef.rs b/task/thermal/src/bsp/gimlet_bcdef.rs index 220a2fe2d..ba08f04c4 100644 --- a/task/thermal/src/bsp/gimlet_bcdef.rs +++ b/task/thermal/src/bsp/gimlet_bcdef.rs @@ -12,7 +12,7 @@ use crate::{ i2c_config::{devices, sensors}, }; pub use drv_cpu_seq_api::SeqError; -use drv_cpu_seq_api::{PowerState, Sequencer}; +use drv_cpu_seq_api::{PowerState, Sequencer, StateChangeReason}; use task_sensor_api::SensorId; use task_thermal_api::ThermalProperties; use userlib::{task_slot, units::Celsius, TaskId, UnwrapLite}; @@ -109,7 +109,8 @@ impl Bsp { } pub fn power_down(&self) -> Result<(), SeqError> { - self.seq.set_state(PowerState::A2) + self.seq + .set_state_with_reason(PowerState::A2, StateChangeReason::Overheat) } pub fn power_mode(&self) -> PowerBitmask {