From 47735cb1d5c5797ef7f6b5e32aa4956c239b5aa8 Mon Sep 17 00:00:00 2001
From: "Cliff L. Biffle" <code@cliffle.com>
Date: Wed, 20 Nov 2024 07:49:50 -0800
Subject: [PATCH] Add kipc::find_faulted_task.

This resolves a four-year-old TODO in El Jefe asking for a way to
process faulted tasks without making so many kipcs. The original
supervisor kipc interface was, by definition, designed before we knew
what we were doing. Now that we have some miles on the system, some
things are more clear:

1. The supervisor doesn't use the TaskState data to make its decisions.
2. The TaskState data is pretty expensive to serialize/deserialize, and
   produces code containing panic sites.
3. Panic sites in the supervisor are bad, since it is not allowed to
   panic.

The new find_faulted_task operation can detect all N faulted tasks using
N+1 kipcs, instead of one per potentially faulted task, and the request
and response messages are trivial to serialize (one four-byte integer
each way). This has allowed me to write (out-of-tree) "minisuper," a
supervisor in 256 bytes that cannot panic.

In-tree, this has the advantage of knocking 33% off Jefe's flash size
and reducing statically-analyzable max stack depth by 20%.
---
 doc/kipc.adoc           | 56 +++++++++++++++++++++++++++++++++++++++++
 sys/abi/src/lib.rs      |  2 ++
 sys/kern/src/kipc.rs    | 44 ++++++++++++++++++++++++++++++++
 sys/userlib/src/kipc.rs | 16 ++++++++++++
 task/jefe/src/main.rs   | 47 +++++++++++++++++-----------------
 5 files changed, 141 insertions(+), 24 deletions(-)
diff --git a/doc/kipc.adoc b/doc/kipc.adoc
index 764973a7b8..8ff134e291 100644
--- a/doc/kipc.adoc
+++ b/doc/kipc.adoc
@@ -290,6 +290,62 @@ returned by a call to `get_task_dump_region` for the specified task.
 A copy of the memory referred to by the specified region, starting
 at `base` and running for `size` bytes.
 
+=== `find_faulted_task` (8)
+
+Scans forward from a given task index searching for a faulted task. If a faulted
+task is found, returns its index. Otherwise, returns zero, the index of the
+supervisor task, which is by definition not faulted.
+
+==== Request
+
+[source,rust]
+----
+struct FindFaultedTaskRequest {
+    starting_index: u32,
+}
+----
+
+==== Preconditions
+
+The `starting_index` must be a valid index for this system.
+
+==== Response
+
+[source,rust]
+----
+struct FindFaultedTaskResponse {
+    fault_index: u32,
+}
+----
+
+==== Notes
+
+This is intended for the common case of a supervisor scanning the task table in
+search of a fault. Compared to doing this manually with `read_task_status`,
+`find_faulted_task` has several advantages:
+
+1. Generates one kipc per failed task, plus one, rather than one kipc per
+   potentially failed task.
+
+2. Avoids serializing/transferring/deserializing the relatively complex
+   `TaskState` type, which supervisors rarely make use of in practice.
+
+3. Can be implemented in a way that doesn't potentially panic the supervisor.
+
+The "task ID or zero" return value is represented as an `Option<NonZeroUsize>`
+in the Rust API, so a typical use of this kipc looks like:
+
+[source,rust]
+----
+let mut next_task = 1; // skip supervisor
+while let Some(fault) = kipc::find_faulted_task(next_task) {
+    let fault = usize::from(fault);
+    // do things with the faulted task
+
+    next_task = fault + 1;
+}
+----
+
 == Receiving from the kernel
 
 The kernel never sends messages to tasks. It's simply not equipped to do so.
diff --git a/sys/abi/src/lib.rs b/sys/abi/src/lib.rs
index 4563b4b438..dd00782e43 100644
--- a/sys/abi/src/lib.rs
+++ b/sys/abi/src/lib.rs
@@ -490,6 +490,7 @@ pub enum Kipcnum {
     GetTaskDumpRegion = 6,
     ReadTaskDumpRegion = 7,
     SoftwareIrq = 8,
+    FindFaultedTask = 9,
 }
 
 impl core::convert::TryFrom<u16> for Kipcnum {
@@ -505,6 +506,7 @@ impl core::convert::TryFrom<u16> for Kipcnum {
             6 => Ok(Self::GetTaskDumpRegion),
             7 => Ok(Self::ReadTaskDumpRegion),
             8 => Ok(Self::SoftwareIrq),
+            9 => Ok(Self::FindFaultedTask),
             _ => Err(()),
         }
     }
diff --git a/sys/kern/src/kipc.rs b/sys/kern/src/kipc.rs
index 5e6f6fc501..17bcfc859b 100644
--- a/sys/kern/src/kipc.rs
+++ b/sys/kern/src/kipc.rs
@@ -39,6 +39,9 @@ pub fn handle_kernel_message(
             read_task_dump_region(tasks, caller, args.message?, args.response?)
         }
         Ok(Kipcnum::SoftwareIrq) => software_irq(tasks, caller, args.message?),
+        Ok(Kipcnum::FindFaultedTask) => {
+            find_faulted_task(tasks, caller, args.message?, args.response?)
+        }
 
         _ => {
             // Task has sent an unknown message to the kernel. That's bad.
@@ -465,3 +468,44 @@ fn software_irq(
     tasks[caller].save_mut().set_send_response_and_length(0, 0);
     Ok(NextTask::Same)
 }
+
+fn find_faulted_task(
+    tasks: &mut [Task],
+    caller: usize,
+    message: USlice<u8>,
+    response: USlice<u8>,
+) -> Result<NextTask, UserError> {
+    if caller != 0 {
+        return Err(UserError::Unrecoverable(FaultInfo::SyscallUsage(
+            UsageError::NotSupervisor,
+        )));
+    }
+
+    let index = deserialize_message::<u32>(&tasks[caller], message)? as usize;
+
+    // Note: we explicitly permit index == tasks.len(), which causes us to wrap
+    // and end the search.
+    if index > tasks.len() {
+        return Err(UserError::Unrecoverable(FaultInfo::SyscallUsage(
+            UsageError::TaskOutOfRange,
+        )));
+    }
+
+    for i in index..tasks.len() {
+        if let TaskState::Faulted { .. } = tasks[i].state() {
+            let response_len =
+                serialize_response(&mut tasks[caller], response, &(i as u32))?;
+            tasks[caller]
+                .save_mut()
+                .set_send_response_and_length(0, response_len);
+            return Ok(NextTask::Same);
+        }
+    }
+
+    let response_len =
+        serialize_response(&mut tasks[caller], response, &0_u32)?;
+    tasks[caller]
+        .save_mut()
+        .set_send_response_and_length(0, response_len);
+    Ok(NextTask::Same)
+}
diff --git a/sys/userlib/src/kipc.rs b/sys/userlib/src/kipc.rs
index b153ca2913..e96d357302 100644
--- a/sys/userlib/src/kipc.rs
+++ b/sys/userlib/src/kipc.rs
@@ -4,6 +4,8 @@
 
 //! Operations implemented by IPC with the kernel task.
 
+use core::num::NonZeroUsize;
+
 use abi::{Kipcnum, TaskId};
 use zerocopy::AsBytes;
 
@@ -24,6 +26,20 @@ pub fn read_task_status(task: usize) -> abi::TaskState {
     ssmarshal::deserialize(&response[..len]).unwrap_lite().0
 }
 
+pub fn find_faulted_task(task: usize) -> Option<NonZeroUsize> {
+    // Coerce `task` to a known size (Rust doesn't assume that usize == u32)
+    let task = task as u32;
+    let mut response = 0_u32;
+    let (_, _) = sys_send(
+        TaskId::KERNEL,
+        Kipcnum::ReadTaskStatus as u16,
+        task.as_bytes(),
+        response.as_bytes_mut(),
+        &[],
+    );
+    NonZeroUsize::new(response as usize)
+}
+
 pub fn get_task_dump_region(
     task: usize,
     region: usize,
diff --git a/task/jefe/src/main.rs b/task/jefe/src/main.rs
index f52855b16b..2d48acd1de 100644
--- a/task/jefe/src/main.rs
+++ b/task/jefe/src/main.rs
@@ -307,37 +307,36 @@ impl idol_runtime::NotificationHandler for ServerImpl<'_> {
             // one task to have faulted since we last looked, but it's somewhat
             // unlikely since a fault causes us to immediately preempt. In any
             // case, let's assume we might have to handle multiple tasks.
-            //
-            // TODO: it would be fantastic to have a way of finding this out in
-            // one syscall.
-            for (i, status) in self.task_states.iter_mut().enumerate() {
+            let mut next_task = 1;
+            while let Some(fault_index) = kipc::find_faulted_task(next_task) {
+                let fault_index = usize::from(fault_index);
+                next_task = fault_index + 1;
+
+                let status = &mut self.task_states[fault_index];
+
                 // If we're aware that this task is in a fault state, don't
                 // bother making a syscall to enquire.
                 if status.holding_fault {
                     continue;
                 }
 
-                if let abi::TaskState::Faulted { .. } =
-                    kipc::read_task_status(i)
+                #[cfg(feature = "dump")]
                 {
-                    #[cfg(feature = "dump")]
-                    {
-                        // We'll ignore the result of dumping; it could fail
-                        // if we're out of space, but we don't have a way of
-                        // dealing with that right now.
-                        //
-                        // TODO: some kind of circular buffer?
-                        _ = dump::dump_task(self.dump_areas, i);
-                    }
-
-                    if status.disposition == Disposition::Restart {
-                        // Stand it back up
-                        kipc::restart_task(i, true);
-                    } else {
-                        // Mark this one off so we don't revisit it until
-                        // requested.
-                        status.holding_fault = true;
-                    }
+                    // We'll ignore the result of dumping; it could fail
+                    // if we're out of space, but we don't have a way of
+                    // dealing with that right now.
+                    //
+                    // TODO: some kind of circular buffer?
+                    _ = dump::dump_task(self.dump_areas, fault_index);
+                }
+
+                if status.disposition == Disposition::Restart {
+                    // Stand it back up
+                    kipc::restart_task(fault_index, true);
+                } else {
+                    // Mark this one off so we don't revisit it until
+                    // requested.
+                    status.holding_fault = true;
                 }
             }
         }