From 02e32f06a84dd1143bab2b420139033df11abe90 Mon Sep 17 00:00:00 2001 From: dominic <510002+dmah42@users.noreply.github.com> Date: Sun, 18 Aug 2024 06:22:29 +0100 Subject: [PATCH 1/2] Bring VM service more inline with other auraed services (#530) * Bring VM service more inline with other auraed services Specifically: * add a custom error enum and use it * split the basic implementation from the tonic wrapper * add basic documentation TODO: * validate requests * add tests * rename create to allocate to match auraed API style * locked --- Makefile | 26 +-- api/v0/vms/vms.proto | 6 +- auraed/src/vms/error.rs | 55 ++++++ auraed/src/vms/mod.rs | 4 +- auraed/src/vms/vm_service.rs | 166 +++++++++++++----- .../vms_start_must_start_vm_with_auraed.rs | 4 +- 6 files changed, 194 insertions(+), 67 deletions(-) create mode 100644 auraed/src/vms/error.rs diff --git a/Makefile b/Makefile index c9b767a2f..6e9b17e17 100644 --- a/Makefile +++ b/Makefile @@ -204,35 +204,35 @@ $(1)-lint: $(GEN_RS) $(GEN_TS) .PHONY: $(1)-test $(1)-test: $(GEN_RS) $(GEN_TS) auraed - $(cargo) test -p $(1) + $(cargo) test -p $(1) --locked .PHONY: $(1)-test-all $(1)-test-all: $(GEN_RS) $(GEN_TS) auraed - $(root_cargo) test -p $(1) -- --include-ignored + $(root_cargo) test -p $(1) --locked -- --include-ignored .PHONY: $(1)-test-integration $(1)-test-integration: $(GEN_RS) $(GEN_TS) auraed - $(root_cargo) test -p $(1) --test '*' -- --include-ignored + $(root_cargo) test -p $(1) --locked --test '*' -- --include-ignored .PHONY: $(1)-test-watch $(1)-test-watch: $(GEN_RS) $(GEN_TS) auraed # Use cargo-watch to continuously run a test (e.g. make $(1)-test-watch name=path::to::test) - $(root_cargo) watch -- $(cargo) test -p $(1) $(name) -- --include-ignored --nocapture + $(root_cargo) watch -- $(cargo) test -p $(1) --locked $(name) -- --include-ignored --nocapture .PHONY: $(1)-build $(1)-build: $(GEN_RS) $(GEN_TS) - $(cargo) build -p $(1) + $(cargo) build -p $(1) --locked .PHONY: $(1)-build-release $(1)-build-release: $(GEN_RS) $(GEN_TS) - $(cargo) build -p $(1) --release + $(cargo) build -p $(1) --locked --release .PHONY: $(1)-debug $(1)-debug: $(GEN_RS) $(GEN_TS) $(1)-lint - $(cargo) install --path ./$(1) --debug --force + $(cargo) install --path ./$(1) --debug --force --locked .PHONY: $(1)-release $(1)-release: $(GEN_RS) $(GEN_TS) $(1)-lint $(1)-test ## Lint, test, and install $(1) - $(cargo) install --path ./$(1) --force + $(cargo) install --path ./$(1) --force --locked endef $(foreach p,$(PROGS),$(eval $(call AURAE_template,$(p),$(if $(findstring auraed,$(p)),)))) @@ -257,7 +257,7 @@ endif .PHONY: not-auraed-build not-auraed-build: $(GEN_RS) $(GEN_TS) - $(cargo) build --workspace --exclude auraed + $(cargo) build --workspace --locked --exclude auraed .PHONY: not-auraed-lint not-auraed-lint: $(GEN_RS) $(GEN_TS) @@ -265,11 +265,11 @@ not-auraed-lint: $(GEN_RS) $(GEN_TS) .PHONY: not-auraed-test not-auraed-test: $(GEN_RS) $(GEN_TS) - $(cargo) test --workspace --exclude auraed + $(cargo) test --workspace --locked --exclude auraed .PHONY: not-auraed-test-all not-auraed-test-all: $(GEN_RS) $(GEN_TS) - $(cargo) test --workspace --exclude auraed -- --include-ignored + $(cargo) test --workspace --locked --exclude auraed -- --include-ignored #------------------------------------------------------------------------------# @@ -281,11 +281,11 @@ libs-lint: $(GEN_RS) $(GEN_TS) .PHONY: libs-test libs-test: $(GEN_RS) $(GEN_TS) - $(cargo) test --workspace --exclude auraed --exclude auraescript --exclude aer + $(cargo) test --workspace --locked --exclude auraed --exclude auraescript --exclude aer .PHONY: libs-test-all libs-test-all: $(GEN_RS) $(GEN_TS) - $(cargo) test --workspace --exclude auraed --exclude auraescript --exclude aer -- --include-ignored + $(cargo) test --workspace --locked --exclude auraed --exclude auraescript --exclude aer -- --include-ignored .PHONY: ebpf ebpf: diff --git a/api/v0/vms/vms.proto b/api/v0/vms/vms.proto index 682390837..300fca8f4 100644 --- a/api/v0/vms/vms.proto +++ b/api/v0/vms/vms.proto @@ -36,7 +36,7 @@ option go_package = "github.com/aurae-runtime/ae/client/pkg/api/v0/vms;vmsv0"; service VmService { // Reserve requested system resources for a new VM. - rpc Create(VmServiceCreateRequest) returns (VmServiceCreateResponse) {} + rpc Allocate(VmServiceAllocateRequest) returns (VmServiceAllocateResponse) {} // Free up previously requested resources for an existing VM rpc Free(VmServiceFreeRequest) returns (VmServiceFreeResponse) {} @@ -79,10 +79,10 @@ message VirtualMachineSummary { string auraed_address = 7; } -message VmServiceCreateRequest{ +message VmServiceAllocateRequest{ VirtualMachine machine = 1; } -message VmServiceCreateResponse{ +message VmServiceAllocateResponse{ string vm_id = 1; } diff --git a/auraed/src/vms/error.rs b/auraed/src/vms/error.rs new file mode 100644 index 000000000..1eb6c83b9 --- /dev/null +++ b/auraed/src/vms/error.rs @@ -0,0 +1,55 @@ +/* -------------------------------------------------------------------------- *\ + * | █████╗ ██╗ ██╗██████╗ █████╗ ███████╗ | * + * | ██╔══██╗██║ ██║██╔══██╗██╔══██╗██╔════╝ | * + * | ███████║██║ ██║██████╔╝███████║█████╗ | * + * | ██╔══██║██║ ██║██╔══██╗██╔══██║██╔══╝ | * + * | ██║ ██║╚██████╔╝██║ ██║██║ ██║███████╗ | * + * | ╚═╝ ╚═╝ ╚═════╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝ | * + * +--------------------------------------------+ * + * * + * Distributed Systems Runtime * + * -------------------------------------------------------------------------- * + * Copyright 2022 - 2024, the aurae contributors * + * SPDX-License-Identifier: Apache-2.0 * +\* -------------------------------------------------------------------------- */ + +use thiserror::Error; +use tonic::Status; +use tracing::error; + +use super::virtual_machine::VmID; + +pub(crate) type Result = std::result::Result; + +#[derive(Debug, Error)] +pub(crate) enum VmServiceError { + #[error("vm '{id}' could not be allocated: {source}")] + FailedToAllocateError { id: VmID, source: anyhow::Error }, + #[error("vm '{id}' could not be freed: {source}")] + FailedToFreeError { id: VmID, source: anyhow::Error }, + #[error("vm '{id}' could not be started: {source}")] + FailedToStartError { id: VmID, source: anyhow::Error }, + #[error("vm '{id}' could not be stopped: {source}")] + FailedToStopError { id: VmID, source: anyhow::Error }, + #[error("vm config has no machine specified")] + MissingMachineConfig, + #[error("vm '{id}' config has no root drive specified")] + MissingRootDrive { id: VmID }, +} + +impl From for Status { + fn from(err: VmServiceError) -> Self { + let msg = err.to_string(); + error!("{msg}"); + match err { + VmServiceError::FailedToAllocateError { .. } + | VmServiceError::FailedToFreeError { .. } + | VmServiceError::FailedToStartError { .. } + | VmServiceError::FailedToStopError { .. } => Status::internal(msg), + VmServiceError::MissingMachineConfig { .. } + | VmServiceError::MissingRootDrive { .. } => { + Status::failed_precondition(msg) + } + } + } +} diff --git a/auraed/src/vms/mod.rs b/auraed/src/vms/mod.rs index d7c28833e..277d28771 100644 --- a/auraed/src/vms/mod.rs +++ b/auraed/src/vms/mod.rs @@ -13,12 +13,10 @@ * SPDX-License-Identifier: Apache-2.0 * \* -------------------------------------------------------------------------- */ +mod error; mod manager; mod virtual_machine; mod virtual_machines; mod vm_service; pub(crate) use vm_service::VmService; - -// TODO: Custom Errors -// pub mod errors; diff --git a/auraed/src/vms/vm_service.rs b/auraed/src/vms/vm_service.rs index f3680c8a3..4e3c46cf6 100644 --- a/auraed/src/vms/vm_service.rs +++ b/auraed/src/vms/vm_service.rs @@ -14,46 +14,56 @@ \* -------------------------------------------------------------------------- */ use proto::vms::{ - vm_service_server, VirtualMachineSummary, VmServiceCreateRequest, - VmServiceCreateResponse, VmServiceFreeRequest, VmServiceFreeResponse, - VmServiceListResponse, VmServiceStartRequest, VmServiceStartResponse, - VmServiceStopRequest, VmServiceStopResponse, + vm_service_server, VirtualMachineSummary, VmServiceAllocateRequest, + VmServiceAllocateResponse, VmServiceFreeRequest, VmServiceFreeResponse, + VmServiceListRequest, VmServiceListResponse, VmServiceStartRequest, + VmServiceStartResponse, VmServiceStopRequest, VmServiceStopResponse, }; use std::{path::PathBuf, sync::Arc}; use tokio::sync::Mutex; use tonic::{Request, Response, Status}; use super::{ + error::{Result, VmServiceError}, virtual_machine::{MountSpec, VmID, VmSpec}, virtual_machines::VirtualMachines, }; +/// VmService struct manages the lifecycle of virtual machines. #[derive(Debug, Clone)] pub struct VmService { vms: Arc>, + // TODO: ObserveService } impl VmService { + /// Allocates a new instance of VmService. pub fn new() -> Self { Self { vms: Default::default() } } -} -#[tonic::async_trait] -impl vm_service_server::VmService for VmService { - async fn create( + // TODO: validate requestts + /// Allocates a new VM based on the provided request. + /// + /// # Arguments + /// * `request` - A (currently unvalidated) request to allocate a VM + /// + /// # Returns + /// A result containing the VmServiceAllocateResponse or an error. + #[tracing::instrument(skip(self))] + async fn allocate( &self, - request: Request, - ) -> Result, Status> { + request: VmServiceAllocateRequest, + ) -> Result { let mut vms = self.vms.lock().await; - let req = request.into_inner(); - let Some(vm) = req.machine else { - return Err(Status::invalid_argument("No machine config provided")); + let Some(vm) = request.machine else { + return Err(VmServiceError::MissingMachineConfig {}); }; + let id = VmID::new(vm.id); let Some(root_drive) = vm.root_drive else { - return Err(Status::invalid_argument("No root drive provided")); + return Err(VmServiceError::MissingRootDrive { id: id.clone() }); }; let mut mounts = vec![MountSpec { @@ -65,7 +75,6 @@ impl vm_service_server::VmService for VmService { read_only: m.read_only, })); - let id = VmID::new(vm.id); let spec = VmSpec { memory_size: vm.mem_size_mb, vcpu_count: vm.vcpu_count, @@ -75,64 +84,85 @@ impl vm_service_server::VmService for VmService { net: vec![], }; - let vm = vms.create(id, spec).map_err(|e| { - Status::internal(format!("Failed to create VM: {:?}", e)) + let vm = vms.create(id.clone(), spec).map_err(|e| { + VmServiceError::FailedToAllocateError { id, source: e } })?; - Ok(Response::new(VmServiceCreateResponse { vm_id: vm.id.to_string() })) + Ok(VmServiceAllocateResponse { vm_id: vm.id.to_string() }) } + /// Frees a VM + /// + /// # Arguments + /// * `request` - An (unvalidated) request to free a VM + /// + /// # Returns + /// A result containing VmServiceFreeResponse or an error. + #[tracing::instrument(skip(self))] async fn free( &self, - request: Request, - ) -> Result, Status> { - let req = request.into_inner(); - let id = VmID::new(req.vm_id); + request: VmServiceFreeRequest, + ) -> Result { + let id = VmID::new(request.vm_id); let mut vms = self.vms.lock().await; - vms.delete(&id).map_err(|e| { - Status::internal(format!("Failed to start VM: {:?}", e)) - })?; + vms.delete(&id) + .map_err(|e| VmServiceError::FailedToFreeError { id, source: e })?; - Ok(Response::new(VmServiceFreeResponse {})) + Ok(VmServiceFreeResponse {}) } + /// Starts a VM + /// + /// # Arguments + /// * `request` - An (unvalidated) request to start a VM + /// + /// # Returns + /// A result containing VmServiceStartResponse or an error. + #[tracing::instrument(skip(self))] async fn start( &self, - request: Request, - ) -> Result, Status> { - let req = request.into_inner(); - let id = VmID::new(req.vm_id); + request: VmServiceStartRequest, + ) -> Result { + let id = VmID::new(request.vm_id); let mut vms = self.vms.lock().await; let addr = vms.start(&id).map_err(|e| { - Status::internal(format!("Failed to start VM: {:?}", e)) + VmServiceError::FailedToStartError { id, source: e } })?; - Ok(Response::new(VmServiceStartResponse { auraed_address: addr })) + Ok(VmServiceStartResponse { auraed_address: addr }) } + /// Stops a VM + /// + /// # Arguments + /// * `request` - An (unvalidated) request to stop a VM + /// + /// # Returns + /// A result containing VmServiceStopResponse or an error. + #[tracing::instrument(skip(self))] async fn stop( &self, - request: Request, - ) -> Result, Status> { - let req = request.into_inner(); - let id = VmID::new(req.vm_id); + request: VmServiceStopRequest, + ) -> Result { + let id = VmID::new(request.vm_id); let mut vms = self.vms.lock().await; - vms.stop(&id).map_err(|e| { - Status::internal(format!("Failed to stop VM: {:?}", e)) - })?; + vms.stop(&id) + .map_err(|e| VmServiceError::FailedToStopError { id, source: e })?; - Ok(Response::new(VmServiceStopResponse {})) + Ok(VmServiceStopResponse {}) } - async fn list( - &self, - _request: Request, - ) -> Result, Status> { + /// List VMs + /// + /// # Returns + /// A result containing VmServiceListResponse or an error. + #[tracing::instrument(skip(self))] + async fn list(&self) -> Result { let vms = self.vms.lock().await; - Ok(Response::new(VmServiceListResponse { + Ok(VmServiceListResponse { machines: vms .list() .iter() @@ -156,6 +186,50 @@ impl vm_service_server::VmService for VmService { status: m.status.to_string(), }) .collect(), - })) + }) + } +} + +#[tonic::async_trait] +impl vm_service_server::VmService for VmService { + async fn allocate( + &self, + request: Request, + ) -> std::result::Result, Status> { + let req = request.into_inner(); + // TODO: validate the request + Ok(Response::new(self.allocate(req).await?)) + } + + async fn free( + &self, + request: Request, + ) -> std::result::Result, Status> { + let req = request.into_inner(); + // TODO: validate request + Ok(Response::new(self.free(req).await?)) + } + + async fn start( + &self, + request: Request, + ) -> std::result::Result, Status> { + let req = request.into_inner(); + Ok(Response::new(self.start(req).await?)) + } + + async fn stop( + &self, + request: Request, + ) -> std::result::Result, Status> { + let req = request.into_inner(); + Ok(Response::new(self.stop(req).await?)) + } + + async fn list( + &self, + _request: Request, + ) -> std::result::Result, Status> { + Ok(Response::new(self.list().await?)) } } diff --git a/auraed/tests/vms_start_must_start_vm_with_auraed.rs b/auraed/tests/vms_start_must_start_vm_with_auraed.rs index 3e190a081..3c7dabecf 100644 --- a/auraed/tests/vms_start_must_start_vm_with_auraed.rs +++ b/auraed/tests/vms_start_must_start_vm_with_auraed.rs @@ -19,7 +19,7 @@ use client::{Client, ClientError}; use common::remote_auraed_client; use proto::discovery::DiscoverRequest; use proto::vms::{ - RootDrive, VirtualMachine, VmServiceCreateRequest, VmServiceListRequest, + RootDrive, VirtualMachine, VmServiceAllocateRequest, VmServiceListRequest, VmServiceStartRequest, }; @@ -32,7 +32,7 @@ async fn vms_with_auraed() { let client = common::auraed_client().await; let res = retry!( client - .create(VmServiceCreateRequest { + .allocate(VmServiceAllocateRequest { machine: Some(VirtualMachine { id: vm_id.clone(), mem_size_mb: 1024, From fcb6ad6d0cc7acd36341f6503f99f31ea7a53abc Mon Sep 17 00:00:00 2001 From: dominic <510002+dmah42@users.noreply.github.com> Date: Sun, 18 Aug 2024 06:23:24 +0100 Subject: [PATCH 2/2] Introduce network config struct (#529) Only used by pid1 for now, but other things that need to initialize networking may want to use it too. --- auraed/src/init/network/mod.rs | 50 ++++++++++--------- .../system_runtimes/pid1_system_runtime.rs | 15 +++++- auraed/tests/common/mod.rs | 2 + 3 files changed, 43 insertions(+), 24 deletions(-) diff --git a/auraed/src/init/network/mod.rs b/auraed/src/init/network/mod.rs index 494f3d83f..4a5a2ef06 100644 --- a/auraed/src/init/network/mod.rs +++ b/auraed/src/init/network/mod.rs @@ -53,6 +53,13 @@ pub(crate) enum NetworkError { Other(#[from] rtnetlink::Error), } +pub(crate) struct Config { + pub device: String, + pub address: String, + pub gateway: String, + pub subnet: String, +} + pub(crate) struct Network(Handle); impl Network { @@ -62,9 +69,12 @@ impl Network { Ok(Self(handle)) } - pub(crate) async fn init(&self) -> Result<(), NetworkError> { + pub(crate) async fn init( + &self, + config: &Config, + ) -> Result<(), NetworkError> { configure_loopback(&self.0).await?; - configure_nic(&self.0).await?; + configure_nic(&self.0, config).await?; Ok(()) } @@ -125,38 +135,32 @@ async fn configure_loopback(handle: &Handle) -> Result<(), NetworkError> { Ok(()) } -// TODO: design network config struct -async fn configure_nic(handle: &Handle) -> Result<(), NetworkError> { - const DEFAULT_NET_DEV: &str = "eth0"; - const DEFAULT_NET_DEV_IPV6: &str = "fe80::2"; - const DEFAULT_NET_DEV_IPV6_GATEWAY: &str = "fe80::1"; - const DEFAULT_NET_DEV_IPV6_SUBNET: &str = "/64"; - - trace!("configure {DEFAULT_NET_DEV}"); - - let ipv6_addr = - format!("{DEFAULT_NET_DEV_IPV6}{DEFAULT_NET_DEV_IPV6_SUBNET}") - .parse::() - .expect("valid ipv6 address"); +async fn configure_nic( + handle: &Handle, + config: &Config, +) -> Result<(), NetworkError> { + trace!("configure {0}", config.device); - let gateway = DEFAULT_NET_DEV_IPV6_GATEWAY - .to_string() + let ipv6_addr = format!("{0}{1}", config.address, config.subnet) .parse::() - .expect("gateway"); + .expect("valid ipv6 address"); - add_address(handle, DEFAULT_NET_DEV.to_owned(), ipv6_addr).await?; + let gateway = + config.gateway.to_string().parse::().expect("gateway"); - set_link_up(handle, DEFAULT_NET_DEV.to_owned()).await?; + add_address(handle, config.device.clone(), ipv6_addr).await?; + + set_link_up(handle, config.device.clone()).await?; add_route_v6( handle, - DEFAULT_NET_DEV.to_owned(), + config.device.clone(), "::/0".parse::().expect("valid ipv6 address"), gateway, ) .await?; - info!("Successfully configured {DEFAULT_NET_DEV}"); + info!("Successfully configured {0}", config.device); Ok(()) } @@ -371,4 +375,4 @@ async fn dump_addresses( } else { Err(NetworkError::DeviceNotFound { iface: iface.to_string() }) } -} \ No newline at end of file +} diff --git a/auraed/src/init/system_runtimes/pid1_system_runtime.rs b/auraed/src/init/system_runtimes/pid1_system_runtime.rs index 12ae0b459..5221b055c 100644 --- a/auraed/src/init/system_runtimes/pid1_system_runtime.rs +++ b/auraed/src/init/system_runtimes/pid1_system_runtime.rs @@ -94,9 +94,22 @@ impl SystemRuntime for Pid1SystemRuntime { .mount()?; trace!("Configure network"); + + const DEFAULT_NET_DEV: &str = "eth0"; + const DEFAULT_NET_DEV_IPV6: &str = "fe80::2"; + const DEFAULT_NET_DEV_IPV6_GATEWAY: &str = "fe80::1"; + const DEFAULT_NET_DEV_IPV6_SUBNET: &str = "/64"; + // show_dir("/sys/class/net/", false); // Show available network interfaces let network = network::Network::connect()?; - network.init().await?; + network + .init(&network::Config { + device: DEFAULT_NET_DEV.to_owned(), + address: DEFAULT_NET_DEV_IPV6.to_owned(), + gateway: DEFAULT_NET_DEV_IPV6_GATEWAY.to_owned(), + subnet: DEFAULT_NET_DEV_IPV6_SUBNET.to_owned(), + }) + .await?; network.show_network_info().await; // TODO: do we need to create an interface and address for socket_address? diff --git a/auraed/tests/common/mod.rs b/auraed/tests/common/mod.rs index 2fc989043..90fed3550 100644 --- a/auraed/tests/common/mod.rs +++ b/auraed/tests/common/mod.rs @@ -118,6 +118,8 @@ pub async fn auraed_client() -> Client { CLIENT.get_or_init(inner).await.clone() } +// This is only used in ignored tests (for now). +#[allow(dead_code)] pub async fn remote_auraed_client( ip: String, ) -> Result {