From 69bebe8cedc9990ca80a60fdb068f1ee3f3942dc Mon Sep 17 00:00:00 2001 From: "Cliff L. Biffle" Date: Tue, 24 Dec 2024 21:40:55 -0700 Subject: [PATCH] kern: rework ARMv8-M context switch, fixing a bug? I was playing with reducing ARMv8-M context switch time, by looking into the same sort of changes to MPU loading that I recently did on v6/7. I noticed that we only ever bitwise-OR values into the MAIR registers. In other words, the first task that gets activated loads its exact attributes into MAIR for each region; the _second_ task combines its attributes using bitwise OR; and so on until the registers contain the bitwise OR of all regions in all tasks. (Note that these are the cacheability/sharability attributes, _not_ the access permissions, which are set correctly. The main implication of this would be accidentally caching device memory or breaking DMA.) That seemed bad, so I removed it and reworked the MPU loading loop. Now we build up the MAIR contents in a local before writing them -- no more read-modify-write. Halting the processor in task code, I was able to observe a meaningful distinction in MAIR contents: previously tasks were unable to set memory as device (which involves more 1 bits than not-device), and now they can. The fact that this hasn't bitten us speaks to the relative simplicity of our current ARMv8-M parts! I don't have detailed measurements of the code, but this knocks 32 bytes off the routine, which is likely a performance win too. --- sys/kern/src/arch/arm_m.rs | 68 ++++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/sys/kern/src/arch/arm_m.rs b/sys/kern/src/arch/arm_m.rs index 5044c1725..3df5e3f84 100644 --- a/sys/kern/src/arch/arm_m.rs +++ b/sys/kern/src/arch/arm_m.rs @@ -494,15 +494,22 @@ pub fn apply_memory_protection(task: &task::Task) { } /// ARMv8-M specific MPU accelerator data. +/// +/// This is `repr(C)` only to make the field order match the register order in +/// the hardware, which improves code generation. We do not actually rely on the +/// in-memory representation of this struct otherwise. #[cfg(armv8m)] #[derive(Copy, Clone, Debug)] +#[repr(C)] pub struct RegionDescExt { - /// This is the contents of the RLAR register, but without the enable bit - /// set. We write that first, and then set the enable bit. - rlar_disabled: u32, - rbar: u32, - mair: u32, + + /// This is the contents of the RLAR register with the enable bit set. We + /// can write it with the enable bit set directly, since we disable the MPU + /// before doing so. + rlar: u32, + + mair: u8, } #[cfg(armv8m)] @@ -537,27 +544,25 @@ pub const fn compute_region_extension_data( // Outer/inner non-cacheable, outer shared. (0b01000100, 0b10) } else { - let rw = (ratts.contains(RegionAttributes::READ) as u32) << 1 - | (ratts.contains(RegionAttributes::WRITE) as u32); + let rw = (ratts.contains(RegionAttributes::READ) as u8) << 1 + | (ratts.contains(RegionAttributes::WRITE) as u8); // write-back transient, not shared (0b0100_0100 | rw | rw << 4, 0b00) }; - // RLAR = our upper bound; note that enable (bit 0) is not set, because - // it's possible to hard-fault midway through region configuration if - // address and size are incompatible while the region is enabled. - let rlar_disabled = base + size - 32; // upper bound + // RLAR = our upper bound. We're going ahead and setting the enable bit + // because we expect this to be loaded with the MPU _disabled._ Loading this + // with the MPU _enabled_ would involve momentary inconsistency between RLAR + // and RBAR, since the two cannot be written simultaneously, and that would + // be Bad. + let rlar = (base + size - 32) | 1; // upper bound | enable bit // RBAR = the base let rbar = (xn as u32) | ap << 1 | (sh as u32) << 3 // sharability | base; - RegionDescExt { - rlar_disabled, - rbar, - mair, - } + RegionDescExt { rlar, rbar, mair } } #[cfg(armv8m)] @@ -571,30 +576,37 @@ pub fn apply_memory_protection(task: &task::Task) { disable_mpu(mpu); } + // We'll collect the MAIR register contents here. Indices 0-3 correspond to + // MAIR0's bytes (in LE order); 4-7 are MAIR1. + let mut mairs = [0; 8]; + for (i, region) in task.region_table().iter().enumerate() { let rnr = i as u32; let ext = ®ion.arch_data; + mairs[i] = ext.mair; + + // Set the attridx field of the RLAR to just choose the attributes with + // the same index as the region. This lets us treat MAIR as an array + // corresponding to the regions. + // + // We unfortunately can't do this at compile time, because regions can + // be shared, and may not be used in the same table position in all + // tasks. + let rlar = ext.rlar | (i as u32) << 1; // AttrIdx + unsafe { - let rlar_disabled = ext.rlar_disabled | (i as u32) << 1; // AttrIdx mpu.rnr.write(rnr); - mpu.rlar.write(rlar_disabled); // configure but leave disabled - if rnr < 4 { - let mut mair0 = mpu.mair[0].read(); - mair0 |= ext.mair << (rnr * 8); - mpu.mair[0].write(mair0); - } else { - let mut mair1 = mpu.mair[1].read(); - mair1 |= ext.mair << ((rnr - 4) * 8); - mpu.mair[1].write(mair1); - } mpu.rbar.write(ext.rbar); - mpu.rlar.write(rlar_disabled | 1); // enable the region + mpu.rlar.write(rlar); } } unsafe { + // Load the MAIR registers. + mpu.mair[0].write(u32::from_le_bytes(mairs[..4].try_into().unwrap())); + mpu.mair[1].write(u32::from_le_bytes(mairs[4..].try_into().unwrap())); enable_mpu(mpu, true); } }