Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

PVF worker: random fixes #7649

Merged
merged 5 commits into from
Aug 21, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 51 additions & 13 deletions node/core/pvf/common/src/worker/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,22 +121,13 @@ pub fn worker_event_loop<F, Fut>(
"Node and worker version mismatch, node needs restarting, forcing shutdown",
);
kill_parent_node_in_emergency();
let err: io::Result<Never> =
Err(io::Error::new(io::ErrorKind::Unsupported, "Version mismatch"));
gum::debug!(target: LOG_TARGET, %worker_pid, "quitting pvf worker({}): {:?}", debug_id, err);
let err = io::Error::new(io::ErrorKind::Unsupported, "Version mismatch");
worker_shutdown_message(debug_id, worker_pid, err);
return
}
}

// Delete all env vars to prevent malicious code from accessing them.
for (key, _) in std::env::vars() {
// TODO: *theoretically* the value (or mere presence) of `RUST_LOG` can be a source of
// randomness for malicious code. In the future we can remove it also and log in the host;
// see <https://github.com/paritytech/polkadot/issues/7117>.
if key != "RUST_LOG" {
std::env::remove_var(key);
}
}
remove_env_vars(debug_id);

// Run the main worker loop.
let rt = Runtime::new().expect("Creates tokio runtime. If this panics the worker will die and the host will detect that and deal with it.");
Expand All @@ -152,14 +143,61 @@ pub fn worker_event_loop<F, Fut>(
// It's never `Ok` because it's `Ok(Never)`.
.unwrap_err();

gum::debug!(target: LOG_TARGET, %worker_pid, "quitting pvf worker ({}): {:?}", debug_id, err);
worker_shutdown_message(debug_id, worker_pid, err);

// We don't want tokio to wait for the tasks to finish. We want to bring down the worker as fast
// as possible and not wait for stalled validation to finish. This isn't strictly necessary now,
// but may be in the future.
rt.shutdown_background();
}

/// Delete all env vars to prevent malicious code from accessing them.
fn remove_env_vars(debug_id: &'static str) {
for (key, value) in std::env::vars_os() {
// TODO: *theoretically* the value (or mere presence) of `RUST_LOG` can be a source of
// randomness for malicious code. In the future we can remove it also and log in the host;
// see <https://github.com/paritytech/polkadot/issues/7117>.
if key == "RUST_LOG" {
continue
}

// In case of a key or value that would cause [`env::remove_var` to
// panic](https://doc.rust-lang.org/std/env/fn.remove_var.html#panics), we first log a
// warning and then proceed to attempt to remove the env var.
let mut err_reasons = vec![];
let (key_str, value_str) = (key.to_str(), value.to_str());
if key.is_empty() {
err_reasons.push("key is empty");
}
if key_str.is_some_and(|s| s.contains('=')) {
err_reasons.push("key contains '='");
}
if key_str.is_some_and(|s| s.contains('\0')) {
err_reasons.push("key contains null character");
}
if value_str.is_some_and(|s| s.contains('\0')) {
err_reasons.push("value contains null character");
}
if !err_reasons.is_empty() {
gum::warn!(
target: LOG_TARGET,
%debug_id,
?key,
?value,
"Attempting to remove badly-formatted env var, this may cause the PVF worker to crash. Please remove it yourself. Reasons: {:?}",
err_reasons
);
}

std::env::remove_var(key);
}
}

/// Provide a consistent message on worker shutdown.
fn worker_shutdown_message(debug_id: &'static str, worker_pid: u32, err: io::Error) {
gum::debug!(target: LOG_TARGET, %worker_pid, "quitting pvf worker ({}): {:?}", debug_id, err);
}

/// Loop that runs in the CPU time monitor thread on prepare and execute jobs. Continuously wakes up
/// and then either blocks for the remaining CPU time, or returns if we exceed the CPU timeout.
///
Expand Down