diff --git a/bin/veritech/BUCK b/bin/veritech/BUCK index b655d895d5..daddde8a77 100644 --- a/bin/veritech/BUCK +++ b/bin/veritech/BUCK @@ -45,6 +45,8 @@ rust_binary( "dev.decryption.key": "//lib/veritech-server:dev.decryption.key", "lang-js": "//bin/lang-js:bin", "firecracker-setup.sh": "//lib/si-firecracker:firecracker-setup.sh", + "prepare_jailer.sh": "//lib/si-firecracker:prepare_jailer.sh", + "stop.sh": "//lib/si-firecracker:stop.sh", }, visibility = ["PUBLIC"], ) diff --git a/lib/si-firecracker/BUCK b/lib/si-firecracker/BUCK index 0b867c48c4..50d5634711 100644 --- a/lib/si-firecracker/BUCK +++ b/lib/si-firecracker/BUCK @@ -8,11 +8,23 @@ rust_library( "//third-party/rust:thiserror", "//third-party/rust:tokio", ], - srcs = glob(["src/**/*.rs","src/firecracker-setup.sh"]), + srcs = glob(["src/**/*.rs","src/scripts/*"]), ) export_file( name = "firecracker-setup.sh", - src = "src/firecracker-setup.sh", + src = "src/scripts/firecracker-setup.sh", + visibility = ["PUBLIC"], +) + +export_file( + name = "prepare_jailer.sh", + src = "src/scripts/prepare_jailer.sh", + visibility = ["PUBLIC"], +) + +export_file( + name = "stop.sh", + src = "src/scripts/stop.sh", visibility = ["PUBLIC"], ) diff --git a/lib/si-firecracker/src/errors.rs b/lib/si-firecracker/src/errors.rs index 69627160a3..c4919d0acb 100644 --- a/lib/si-firecracker/src/errors.rs +++ b/lib/si-firecracker/src/errors.rs @@ -6,16 +6,19 @@ use thiserror::Error; pub enum FirecrackerJailError { // Failed to clean a jail #[error("Failed to clean a jail: {0}")] - Clean(String), + Clean(#[source] tokio::io::Error), + // Failed running a script to output + #[error("Failed to run a script: {0}")] + Output(String), // Failed to prepare a jail #[error("Failed to prepare a jail: {0}")] - Prepare(String), + Prepare(#[source] tokio::io::Error), // Failed to setup firecracker #[error("Failed to setup firecracker: {0}")] - Setup(String), + Setup(#[from] tokio::io::Error), // Failed to spawn firecracker #[error("Failed to spawn firecracker: {0}")] - Spawn(String), + Spawn(#[source] tokio::io::Error), // Failed to terminate firecracker #[error("Failed to terminate firecracker: {0}")] Terminate(#[from] cyclone_core::process::ShutdownError), diff --git a/lib/si-firecracker/src/firecracker.rs b/lib/si-firecracker/src/firecracker.rs index 7d754fde51..3623ca4063 100644 --- a/lib/si-firecracker/src/firecracker.rs +++ b/lib/si-firecracker/src/firecracker.rs @@ -1,14 +1,29 @@ use crate::errors::FirecrackerJailError; use cyclone_core::process; +use std::fs::Permissions; use std::os::unix::fs::PermissionsExt; use std::path::Path; use std::path::PathBuf; use std::result; +use tokio::fs; use tokio::process::Child; use tokio::process::Command; type Result = result::Result; +const FIRECRACKER_PREPARE_PATH: &str = "/firecracker-data/prepare_jailer.sh"; +const FIRECRACKER_SETUP_PATH: &str = "/firecracker-data/firecracker-setup.sh"; +const FIRECRACKER_STOP_PATH: &str = "/firecracker-data/stop.sh"; +const FIRECRACKER_PREPARE_BYTES: &[u8] = include_bytes!("scripts/prepare_jailer.sh"); +const FIRECRACKER_SETUP_BYTES: &[u8] = include_bytes!("scripts/firecracker-setup.sh"); +const FIRECRACKER_STOP_BYTES: &[u8] = include_bytes!("scripts/stop.sh"); + +const FIRECRACKER_SCRIPTS: &[(&str, &[u8])] = &[ + (FIRECRACKER_PREPARE_PATH, FIRECRACKER_PREPARE_BYTES), + (FIRECRACKER_SETUP_PATH, FIRECRACKER_SETUP_BYTES), + (FIRECRACKER_STOP_PATH, FIRECRACKER_STOP_BYTES), +]; + #[derive(Debug)] pub struct FirecrackerJail { jailer: Command, @@ -48,15 +63,14 @@ impl FirecrackerJail { } pub async fn clean(id: u32) -> Result<()> { - let command = String::from("/firecracker-data/stop.sh"); - let output = Command::new(command) + let output = Command::new(FIRECRACKER_STOP_PATH) .arg(id.to_string()) .output() .await - .map_err(|e| FirecrackerJailError::Clean(e.to_string()))?; + .map_err(FirecrackerJailError::Clean)?; if !output.status.success() { - return Err(FirecrackerJailError::Clean( + return Err(FirecrackerJailError::Output( String::from_utf8(output.stderr) .unwrap_or_else(|_| "Failed to decode stderr".to_string()), )); @@ -65,15 +79,14 @@ impl FirecrackerJail { } pub async fn prepare(id: u32) -> Result<()> { - let command = String::from("/firecracker-data/prepare_jailer.sh"); - let output = Command::new(command) + let output = Command::new(FIRECRACKER_PREPARE_PATH) .arg(id.to_string()) .output() .await - .map_err(|e| FirecrackerJailError::Prepare(e.to_string()))?; + .map_err(FirecrackerJailError::Prepare)?; if !output.status.success() { - return Err(FirecrackerJailError::Prepare( + return Err(FirecrackerJailError::Output( String::from_utf8(output.stderr) .unwrap_or_else(|_| "Failed to decode stderr".to_string()), )); @@ -82,37 +95,19 @@ impl FirecrackerJail { } pub async fn setup(pool_size: u16) -> Result<()> { - let script_bytes = include_bytes!("firecracker-setup.sh"); - let command = Path::new("/firecracker-data/firecracker-setup.sh"); - - // we need to ensure the file is in the correct location with the correct permissions - std::fs::create_dir_all( - command - .parent() - .expect("This should never happen. Did you remove the path from the string above?"), - ) - .map_err(|e| FirecrackerJailError::Setup(e.to_string()))?; - - std::fs::write(command, script_bytes) - .map_err(|e| FirecrackerJailError::Setup(e.to_string()))?; + Self::create_scripts().await?; - std::fs::set_permissions(command, std::fs::Permissions::from_mode(0o755)) - .map_err(|e| FirecrackerJailError::Setup(e.to_string()))?; - - // Spawn the shell process let output = Command::new("sudo") - .arg(command) + .arg(FIRECRACKER_SETUP_PATH) .arg("-j") .arg(pool_size.to_string()) .arg("-rk") - .spawn() - .map_err(|e| FirecrackerJailError::Setup(e.to_string()))? + .spawn()? .wait_with_output() - .await - .map_err(|e| FirecrackerJailError::Setup(e.to_string()))?; + .await?; if !output.status.success() { - return Err(FirecrackerJailError::Setup( + return Err(FirecrackerJailError::Output( String::from_utf8(output.stderr) .unwrap_or_else(|_| "Failed to decode stderr".to_string()), )); @@ -122,11 +117,7 @@ impl FirecrackerJail { } pub async fn spawn(&mut self) -> Result<()> { - self.child = Some( - self.jailer - .spawn() - .map_err(|e| FirecrackerJailError::Spawn(e.to_string()))?, - ); + self.child = Some(self.jailer.spawn().map_err(FirecrackerJailError::Spawn)?); Ok(()) } @@ -139,4 +130,20 @@ impl FirecrackerJail { None => Ok(()), } } + + async fn create_scripts() -> Result<()> { + for (path, bytes) in FIRECRACKER_SCRIPTS { + Self::create_script(Path::new(*path), bytes).await?; + } + Ok(()) + } + + async fn create_script(path: &Path, bytes: &[u8]) -> Result<()> { + if let Some(parent_dir) = path.parent() { + fs::create_dir_all(parent_dir).await? + } + fs::write(&path, bytes).await?; + fs::set_permissions(&path, Permissions::from_mode(0o755)).await?; + Ok(()) + } } diff --git a/lib/si-firecracker/src/firecracker-setup.sh b/lib/si-firecracker/src/scripts/firecracker-setup.sh similarity index 96% rename from lib/si-firecracker/src/firecracker-setup.sh rename to lib/si-firecracker/src/scripts/firecracker-setup.sh index 94ceddca95..51296ec13a 100755 --- a/lib/si-firecracker/src/firecracker-setup.sh +++ b/lib/si-firecracker/src/scripts/firecracker-setup.sh @@ -124,11 +124,6 @@ execute_configuration_management() { # Mount secondary EBS volume at /data for mkdir -p /firecracker-data/output/ && cd /firecracker-data/ - # Helper Scripts - curl https://raw.githubusercontent.com/systeminit/si/${CONFIGURATION_MANAGEMENT_BRANCH:-main}/bin/veritech/scripts/start.sh > ./start.sh - curl https://raw.githubusercontent.com/systeminit/si/${CONFIGURATION_MANAGEMENT_BRANCH:-main}/bin/veritech/scripts/stop.sh > ./stop.sh - curl https://raw.githubusercontent.com/systeminit/si/${CONFIGURATION_MANAGEMENT_BRANCH:-main}/bin/veritech/scripts/prepare_jailer.sh > ./prepare_jailer.sh - arch=$(uname -m) # Remainder of the binaries # TODO(scott): perform some kind of check to decide if we should @@ -198,7 +193,7 @@ execute_configuration_management() { # Set up correct permissions for the /firecracker-data/ folder chown -R jailer-shared:jailer-shared /firecracker-data/ - chmod a+x /firecracker-data/*{.sh,firecracker,jailer} + chmod a+x /firecracker-data/{firecracker,jailer} # chmod 400 /firecracker-data/micro-vm-key # Copy bins to /usr/bin/ diff --git a/lib/si-firecracker/src/scripts/prepare_jailer.sh b/lib/si-firecracker/src/scripts/prepare_jailer.sh new file mode 100755 index 0000000000..070b9f8d0f --- /dev/null +++ b/lib/si-firecracker/src/scripts/prepare_jailer.sh @@ -0,0 +1,218 @@ +#!/bin/bash + +set -euo pipefail + +########## ############################# ######### +########## Helper Funcs ######### +########## ############################# ######### + +# retry to passed command every 5 seconds for up to 30 seconds +function retry() { + n=0 + until [ "$n" -ge 30 ] + do + $1 && break + n=$((n+1)) + sleep 1 + done +} + +########## ############################# ######### +########## Vars ######### +########## ############################# ######### + +SB_ID="${1:-0}" # Default to sb_id=0 + +DATA_DIR="/firecracker-data" +JAILER_DIR="/srv/jailer/firecracker" +JAILER_BINARY="/usr/bin/jailer" + +ROOTFS="rootfs.ext4" +KERNEL="image-kernel.bin" +SCRIPTS="scripts" + +RO_DRIVE="$DATA_DIR/$ROOTFS" +KERNEL_IMG="$DATA_DIR/$KERNEL" + +TAP_DEV="fc-${SB_ID}-tap0" +FC_MAC="$(printf '02:FC:00:00:%02X:%02X' $((SB_ID / 256)) $((SB_ID % 256)))" +JAILER_NS="jailer-$SB_ID" + +########## ############################# ######### +########## User Prep ######### +########## ############################# ######### + +# Create a user and group to run the execution via for one micro-vm +function user_prep() { + useradd -M -u 500$SB_ID $JAILER_NS + usermod -L $JAILER_NS + + # This group was created earlier on the machine provisioning + usermod -a -G jailer-processes $JAILER_NS + usermod -a -G root $JAILER_NS + usermod -a -G kvm $JAILER_NS +} + +if ! id 500$SB_ID >/dev/null 2>&1; then + retry user_prep +fi + +########## ############################# ######### +########## Disk Prep ######### +########## ############################# ######### + +JAIL="$JAILER_DIR/$SB_ID/root" +mkdir -p "$JAIL/" +rm -rf "$JAIL/{dev,run}" + +touch $JAIL/logs +touch $JAIL/metrics +[[ -e $DATA_DIR/$SCRIPTS ]] && cp $DATA_DIR/$SCRIPTS $JAIL + +function kernel_prep() { + cp $KERNEL_IMG "$JAIL/$KERNEL" + # TODO(scott): make this work. First attempt yielded a + # kernel loader InvalidElfMagicNumber error + # OVERLAY="kernel-overlay-$SB_ID" + # OVERLAY_FILE=$JAIL/$OVERLAY + # touch $OVERLAY_FILE + # truncate --size=5368709120 $OVERLAY_FILE + # OVERLAY_LOOP=$(losetup --find --show $OVERLAY_FILE) + # OVERLAY_SZ=$(blockdev --getsz $OVERLAY_LOOP) + # echo "0 $OVERLAY_SZ snapshot /dev/mapper/rootfs $OVERLAY_LOOP P 8" | dmsetup create $OVERLAY + # touch $JAIL/$KERNEL + # mount --bind /dev/mapper/$OVERLAY $JAIL/$KERNEL +} + +function rootfs_prep() { + # Here we create a device-per-jail to act as a unique + # CoW layer. These cannot be shared because we are required + # to bind mount these into the jail dir due to chroot shenanigans. + # Bind mounted permissions propagate, so jails would conflict. + OVERLAY_FILE=$JAIL/$OVERLAY + touch $OVERLAY_FILE + truncate --size=5368709120 $OVERLAY_FILE + OVERLAY_LOOP=$(losetup --find --show $OVERLAY_FILE) + OVERLAY_SZ=$(blockdev --getsz $OVERLAY_LOOP) + echo "0 $OVERLAY_SZ snapshot /dev/mapper/rootfs $OVERLAY_LOOP P 8" | dmsetup create $OVERLAY + touch $JAIL/$ROOTFS + mount --bind /dev/mapper/$OVERLAY $JAIL/$ROOTFS +} + +if ! test -f "$JAIL/$KERNEL"; then + retry kernel_prep +fi + +OVERLAY="rootfs-overlay-$SB_ID" +if ! dmsetup info $OVERLAY &> /dev/null; then + retry rootfs_prep +fi + +chown -R jailer-$SB_ID:jailer-$SB_ID $JAIL/ + +########## ############################# ######### +########## Network Prep ######### +########## ############################# ######### + +# Create network namespace for jailer incantation +if ! test -f /run/netns/$JAILER_NS; then + ip netns add $JAILER_NS + + MASK_LONG="255.255.255.252" + MASK_SHORT="/30" + FC_IP="10.0.0.1" # Intentionally hardcoded to make cross-microvm communication + TAP_IP="10.0.0.2" # more difficult & to simplify rootfs creation/configuration + NET_LINK_MAIN_IP="$(printf '100.65.%s.%s' $(((4 * SB_ID + 1) / 256)) $(((4 * SB_ID + 1) % 256)))" + NET_LINK_JAILER_IP="$(printf '100.65.%s.%s' $(((4 * SB_ID + 2) / 256)) $(((4 * SB_ID + 2) % 256)))" + VETH_DEV="veth-jailer$SB_ID" + + # Setup TAP device that uses proxy ARP + ip netns exec $JAILER_NS ip link del "$TAP_DEV" 2> /dev/null || true + ip netns exec $JAILER_NS ip tuntap add dev "$TAP_DEV" mode tap + + # Disable ipv6, enable Proxy ARP + ip netns exec $JAILER_NS sysctl -w net.ipv4.conf.${TAP_DEV}.proxy_arp=1 > /dev/null + ip netns exec $JAILER_NS sysctl -w net.ipv6.conf.${TAP_DEV}.disable_ipv6=1 > /dev/null + + # Add IP to TAP for micro-vm + ip netns exec $JAILER_NS ip addr add "${TAP_IP}${MASK_SHORT}" dev "$TAP_DEV" + ip netns exec $JAILER_NS ip link set dev "$TAP_DEV" up + + # Set up IP link into default namespace for external routing + ip link add veth-main$SB_ID type veth peer name $VETH_DEV + ip link set $VETH_DEV netns $JAILER_NS + ip addr add $NET_LINK_MAIN_IP/30 dev veth-main$SB_ID + ip netns exec $JAILER_NS ip addr add $NET_LINK_JAILER_IP/30 dev $VETH_DEV + + # Bring the veth link up for external routing + ip link set dev veth-main$SB_ID up + ip netns exec $JAILER_NS ip link set dev $VETH_DEV up + ip netns exec $JAILER_NS ip route replace default via $NET_LINK_MAIN_IP + + # NAT within the namespace to route return traffic to TAP device of firecracker process for inbound traffic + ip netns exec $JAILER_NS iptables -t nat -A POSTROUTING -o $VETH_DEV -j MASQUERADE + ip netns exec $JAILER_NS iptables -A FORWARD -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT + ip netns exec $JAILER_NS iptables -A FORWARD -i $TAP_DEV -o $VETH_DEV -j ACCEPT + ip netns exec $JAILER_NS iptables -A OUTPUT -d 169.254.169.254 -j DROP + +fi + +########## ############################# ######### +########## Firecracker Prep ######### +########## ############################# ######### + +{ +cat << EOF +{ + "boot-source": { + "kernel_image_path": "./$KERNEL", + "boot_args": "panic=1 pci=off nomodules reboot=k tsc=reliable quiet i8042.nokbd i8042.noaux 8250.nr_uarts=0 ipv6.disable=1" + }, + "drives": [ + { + "drive_id": "1", + "is_root_device": true, + "is_read_only": false, + "path_on_host": "./rootfs.ext4" + } +EOF + +if [ -e $JAIL/$SCRIPTS ]; then + +cat << EOF + ,{ + "drive_id": "2", + "is_root_device": false, + "is_read_only": true, + "path_on_host": "./scripts" + } +EOF +fi + +cat << EOF + ], + "machine-config": { + "vcpu_count": 4, + "mem_size_mib": 512 + }, + "network-interfaces": [{ + "iface_id": "1", + "guest_mac": "$FC_MAC", + "host_dev_name": "$TAP_DEV" + }], + "vsock":{ + "guest_cid": 3, + "uds_path": "./v.sock" + }, + "logger": { + "level": "Debug", + "log_path": "./logs", + "show_level": false, + "show_log_origin": false + }, + "metrics": { + "metrics_path": "./metrics" + } +} +EOF +} > $JAIL/firecracker.conf diff --git a/lib/si-firecracker/src/scripts/stop.sh b/lib/si-firecracker/src/scripts/stop.sh new file mode 100755 index 0000000000..d6350fff53 --- /dev/null +++ b/lib/si-firecracker/src/scripts/stop.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +set -euo pipefail + +SB_ID="${1:-0}" + +# Kill the firecracker process if it exists +pkill -f "firecracker --id $SB_ID" || true + +# Remove directories and files +JAIL="/srv/jailer/firecracker/${SB_ID}/root" +DISK="${JAIL}/rootfs.ext4" +OVERLAY="rootfs-overlay-${SB_ID}" +OVERLAY_FILE="${JAIL}/rootfs-overlay-${SB_ID}" + +# Unmount disk if mounted +while mountpoint -q "$DISK"; do + umount -dl "$DISK" +done + +# Remove device mapper overlay +while dmsetup info "$OVERLAY" &> /dev/null; do + dmsetup remove --force --retry "$OVERLAY" +done + +# Detach loop devices related to the specific SB_ID +# Note the ) at the end to ensure we don't match -1 with -10 +if losetup -a | grep "$OVERLAY)" &> /dev/null; then + losetup -d $(losetup -j "$OVERLAY_FILE" -O NAME | sed -n 2p) +fi +rm -rf "/srv/jailer/firecracker/$SB_ID"