From 2082942d287bb8d890c39d624a45a8ca15ddda73 Mon Sep 17 00:00:00 2001 From: Levon Tarver <11586085+internet-diglett@users.noreply.github.com> Date: Wed, 22 May 2024 13:52:44 -0500 Subject: [PATCH] RPW for OPTE v2p Mappings (#5568) TODO --- - [x] Extend db view to include probe v2p mappings - [x] Update sagas to trigger rpw activation instead of directly configuring v2p mappings - [x] Test that the `delete` functionality cleans up v2p mappings Related --- Resolves #5214 Resolves #4259 Resolves #3107 - [x] Depends on https://github.com/oxidecomputer/opte/pull/494 - [x] Depends on https://github.com/oxidecomputer/meta/issues/409 - [x] Depends on https://github.com/oxidecomputer/maghemite/pull/244 --------- Co-authored-by: Levon Tarver --- .github/buildomat/jobs/deploy.sh | 2 +- Cargo.lock | 14 +- Cargo.toml | 4 +- clients/sled-agent-client/src/lib.rs | 3 +- dev-tools/omdb/tests/env.out | 12 + dev-tools/omdb/tests/successes.out | 11 + dev-tools/oxlog/src/bin/oxlog.rs | 2 +- illumos-utils/src/opte/params.rs | 18 +- illumos-utils/src/opte/port_manager.rs | 90 ++++- nexus-config/src/nexus_config.rs | 17 +- nexus/db-model/src/lib.rs | 2 + nexus/db-model/src/schema.rs | 11 + nexus/db-model/src/schema_versions.rs | 3 +- nexus/db-model/src/v2p_mapping.rs | 16 + nexus/db-queries/src/db/datastore/mod.rs | 1 + .../src/db/datastore/network_interface.rs | 56 +++ .../src/db/datastore/v2p_mapping.rs | 45 +++ nexus/examples/config.toml | 1 + nexus/src/app/background/init.rs | 21 ++ nexus/src/app/background/instance_watcher.rs | 6 +- nexus/src/app/background/mod.rs | 1 + nexus/src/app/background/v2p_mappings.rs | 165 +++++++++ nexus/src/app/instance.rs | 6 +- nexus/src/app/instance_network.rs | 328 ++---------------- nexus/src/app/mod.rs | 7 + nexus/src/app/sagas/instance_create.rs | 4 +- nexus/src/app/sagas/instance_delete.rs | 2 + nexus/src/app/sagas/instance_start.rs | 40 +-- nexus/tests/config.test.toml | 1 + nexus/tests/integration_tests/instances.rs | 100 +++--- openapi/sled-agent.json | 129 +++---- .../tests/output/self-stat-schema.json | 4 +- package-manifest.toml | 10 +- .../crdb/add-view-for-v2p-mappings/up01.sql | 41 +++ .../crdb/add-view-for-v2p-mappings/up02.sql | 3 + .../crdb/add-view-for-v2p-mappings/up03.sql | 2 + .../crdb/add-view-for-v2p-mappings/up04.sql | 2 + .../crdb/add-view-for-v2p-mappings/up05.sql | 4 + .../crdb/add-view-for-v2p-mappings/up06.sql | 2 + .../crdb/add-view-for-v2p-mappings/up07.sql | 2 + schema/crdb/dbinit.sql | 69 +++- sled-agent/src/http_entrypoints.rs | 38 +- sled-agent/src/sim/http_entrypoints.rs | 41 ++- sled-agent/src/sim/sled_agent.rs | 37 +- sled-agent/src/sled_agent.rs | 14 +- smf/nexus/multi-sled/config-partial.toml | 1 + smf/nexus/single-sled/config-partial.toml | 1 + tools/maghemite_mg_openapi_version | 4 +- tools/opte_version | 2 +- 49 files changed, 805 insertions(+), 590 deletions(-) create mode 100644 nexus/db-model/src/v2p_mapping.rs create mode 100644 nexus/db-queries/src/db/datastore/v2p_mapping.rs create mode 100644 nexus/src/app/background/v2p_mappings.rs create mode 100644 schema/crdb/add-view-for-v2p-mappings/up01.sql create mode 100644 schema/crdb/add-view-for-v2p-mappings/up02.sql create mode 100644 schema/crdb/add-view-for-v2p-mappings/up03.sql create mode 100644 schema/crdb/add-view-for-v2p-mappings/up04.sql create mode 100644 schema/crdb/add-view-for-v2p-mappings/up05.sql create mode 100644 schema/crdb/add-view-for-v2p-mappings/up06.sql create mode 100644 schema/crdb/add-view-for-v2p-mappings/up07.sql diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index c947a05e10..31733f0dc0 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -2,7 +2,7 @@ #: #: name = "helios / deploy" #: variety = "basic" -#: target = "lab-2.0-opte-0.28" +#: target = "lab-2.0-opte-0.29" #: output_rules = [ #: "%/var/svc/log/oxide-sled-agent:default.log*", #: "%/zone/oxz_*/root/var/svc/log/oxide-*.log*", diff --git a/Cargo.lock b/Cargo.lock index 5cf0c5aa73..0d534a3c2f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1732,7 +1732,7 @@ dependencies = [ [[package]] name = "derror-macro" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=7ee353a470ea59529ee1b34729681da887aa88ce#7ee353a470ea59529ee1b34729681da887aa88ce" +source = "git+https://github.com/oxidecomputer/opte?rev=4cc823b50d3e4a629cdfaab2b3d3382514174ba9#4cc823b50d3e4a629cdfaab2b3d3382514174ba9" dependencies = [ "darling", "proc-macro2", @@ -3481,7 +3481,7 @@ dependencies = [ [[package]] name = "illumos-sys-hdrs" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=7ee353a470ea59529ee1b34729681da887aa88ce#7ee353a470ea59529ee1b34729681da887aa88ce" +source = "git+https://github.com/oxidecomputer/opte?rev=4cc823b50d3e4a629cdfaab2b3d3382514174ba9#4cc823b50d3e4a629cdfaab2b3d3382514174ba9" [[package]] name = "illumos-utils" @@ -3894,7 +3894,7 @@ dependencies = [ [[package]] name = "kstat-macro" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=7ee353a470ea59529ee1b34729681da887aa88ce#7ee353a470ea59529ee1b34729681da887aa88ce" +source = "git+https://github.com/oxidecomputer/opte?rev=4cc823b50d3e4a629cdfaab2b3d3382514174ba9#4cc823b50d3e4a629cdfaab2b3d3382514174ba9" dependencies = [ "quote", "syn 2.0.64", @@ -6019,7 +6019,7 @@ dependencies = [ [[package]] name = "opte" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=7ee353a470ea59529ee1b34729681da887aa88ce#7ee353a470ea59529ee1b34729681da887aa88ce" +source = "git+https://github.com/oxidecomputer/opte?rev=4cc823b50d3e4a629cdfaab2b3d3382514174ba9#4cc823b50d3e4a629cdfaab2b3d3382514174ba9" dependencies = [ "cfg-if", "derror-macro", @@ -6037,7 +6037,7 @@ dependencies = [ [[package]] name = "opte-api" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=7ee353a470ea59529ee1b34729681da887aa88ce#7ee353a470ea59529ee1b34729681da887aa88ce" +source = "git+https://github.com/oxidecomputer/opte?rev=4cc823b50d3e4a629cdfaab2b3d3382514174ba9#4cc823b50d3e4a629cdfaab2b3d3382514174ba9" dependencies = [ "illumos-sys-hdrs", "ipnetwork", @@ -6049,7 +6049,7 @@ dependencies = [ [[package]] name = "opte-ioctl" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=7ee353a470ea59529ee1b34729681da887aa88ce#7ee353a470ea59529ee1b34729681da887aa88ce" +source = "git+https://github.com/oxidecomputer/opte?rev=4cc823b50d3e4a629cdfaab2b3d3382514174ba9#4cc823b50d3e4a629cdfaab2b3d3382514174ba9" dependencies = [ "libc", "libnet 0.1.0 (git+https://github.com/oxidecomputer/netadm-sys)", @@ -6123,7 +6123,7 @@ dependencies = [ [[package]] name = "oxide-vpc" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=7ee353a470ea59529ee1b34729681da887aa88ce#7ee353a470ea59529ee1b34729681da887aa88ce" +source = "git+https://github.com/oxidecomputer/opte?rev=4cc823b50d3e4a629cdfaab2b3d3382514174ba9#4cc823b50d3e4a629cdfaab2b3d3382514174ba9" dependencies = [ "cfg-if", "illumos-sys-hdrs", diff --git a/Cargo.toml b/Cargo.toml index 29e2a8cbd0..16207d2f31 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -347,14 +347,14 @@ omicron-sled-agent = { path = "sled-agent" } omicron-test-utils = { path = "test-utils" } omicron-zone-package = "0.11.0" oxide-client = { path = "clients/oxide-client" } -oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "7ee353a470ea59529ee1b34729681da887aa88ce", features = [ "api", "std" ] } +oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "4cc823b50d3e4a629cdfaab2b3d3382514174ba9", features = [ "api", "std" ] } once_cell = "1.19.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } openapiv3 = "2.0.0" # must match samael's crate! openssl = "0.10" openssl-sys = "0.9" -opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "7ee353a470ea59529ee1b34729681da887aa88ce" } +opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "4cc823b50d3e4a629cdfaab2b3d3382514174ba9" } oso = "0.27" owo-colors = "4.0.0" oximeter = { path = "oximeter/oximeter" } diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index a0145af910..4ac7eed27c 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -35,7 +35,8 @@ progenitor::generate_api!( PortConfigV1 = { derives = [PartialEq, Eq, Hash, Serialize, Deserialize] }, RouteConfig = { derives = [PartialEq, Eq, Hash, Serialize, Deserialize] }, IpNet = { derives = [PartialEq, Eq, Hash, Serialize, Deserialize] }, - OmicronPhysicalDiskConfig = { derives = [Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Hash, PartialOrd, Ord] } + VirtualNetworkInterfaceHost = { derives = [PartialEq, Eq, Hash, Serialize, Deserialize] }, + OmicronPhysicalDiskConfig = { derives = [Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Hash, PartialOrd, Ord] }, }, //TODO trade the manual transformations later in this file for the // replace directives below? diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index 5716510602..d187c47d18 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -114,6 +114,10 @@ task: "switch_port_config_manager" manages switch port settings for rack switches +task: "v2p_manager" + manages opte v2p mappings for vpc networking + + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT @@ -225,6 +229,10 @@ task: "switch_port_config_manager" manages switch port settings for rack switches +task: "v2p_manager" + manages opte v2p mappings for vpc networking + + --------------------------------------------- stderr: note: Nexus URL not specified. Will pick one from DNS. @@ -323,6 +331,10 @@ task: "switch_port_config_manager" manages switch port settings for rack switches +task: "v2p_manager" + manages opte v2p mappings for vpc networking + + --------------------------------------------- stderr: note: Nexus URL not specified. Will pick one from DNS. diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index c4c28460b8..db6e5fde87 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -291,6 +291,10 @@ task: "switch_port_config_manager" manages switch port settings for rack switches +task: "v2p_manager" + manages opte v2p mappings for vpc networking + + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ @@ -471,6 +475,13 @@ task: "switch_port_config_manager" started at (s ago) and ran for ms warning: unknown background task: "switch_port_config_manager" (don't know how to interpret details: Object {}) +task: "v2p_manager" + configured period: every 30s + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms +warning: unknown background task: "v2p_manager" (don't know how to interpret details: Object {}) + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ diff --git a/dev-tools/oxlog/src/bin/oxlog.rs b/dev-tools/oxlog/src/bin/oxlog.rs index ceeb98b3bd..ed1c1a1fc8 100644 --- a/dev-tools/oxlog/src/bin/oxlog.rs +++ b/dev-tools/oxlog/src/bin/oxlog.rs @@ -47,7 +47,7 @@ struct FilterArgs { #[arg(short, long)] archived: bool, - // Print only the extra log files + /// Print only the extra log files #[arg(short, long)] extra: bool, diff --git a/illumos-utils/src/opte/params.rs b/illumos-utils/src/opte/params.rs index df1f33cb92..17c61d680f 100644 --- a/illumos-utils/src/opte/params.rs +++ b/illumos-utils/src/opte/params.rs @@ -31,26 +31,16 @@ pub struct VpcFirewallRule { } /// A mapping from a virtual NIC to a physical host -#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq)] -pub struct SetVirtualNetworkInterfaceHost { +#[derive( + Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq, Eq, Hash, +)] +pub struct VirtualNetworkInterfaceHost { pub virtual_ip: IpAddr, pub virtual_mac: external::MacAddr, pub physical_host_ip: Ipv6Addr, pub vni: external::Vni, } -/// The data needed to identify a virtual IP for which a sled maintains an OPTE -/// virtual-to-physical mapping such that that mapping can be deleted. -#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq)] -pub struct DeleteVirtualNetworkInterfaceHost { - /// The virtual IP whose mapping should be deleted. - pub virtual_ip: IpAddr, - - /// The VNI for the network containing the virtual IP whose mapping should - /// be deleted. - pub vni: external::Vni, -} - /// DHCP configuration for a port /// /// Not present here: Hostname (DHCPv4 option 12; used in DHCPv6 option 39); we diff --git a/illumos-utils/src/opte/port_manager.rs b/illumos-utils/src/opte/port_manager.rs index 03c51c321d..726aa01a2a 100644 --- a/illumos-utils/src/opte/port_manager.rs +++ b/illumos-utils/src/opte/port_manager.rs @@ -5,8 +5,7 @@ //! Manager for all OPTE ports on a Helios system use crate::opte::opte_firewall_rules; -use crate::opte::params::DeleteVirtualNetworkInterfaceHost; -use crate::opte::params::SetVirtualNetworkInterfaceHost; +use crate::opte::params::VirtualNetworkInterfaceHost; use crate::opte::params::VpcFirewallRule; use crate::opte::Error; use crate::opte::Gateway; @@ -570,10 +569,62 @@ impl PortManager { Ok(()) } + #[cfg(target_os = "illumos")] + pub fn list_virtual_nics( + &self, + ) -> Result, Error> { + use macaddr::MacAddr6; + use opte_ioctl::OpteHdl; + + let hdl = OpteHdl::open(OpteHdl::XDE_CTL)?; + let v2p = + hdl.dump_v2p(&oxide_vpc::api::DumpVirt2PhysReq { unused: 99 })?; + let mut mappings: Vec<_> = vec![]; + + for mapping in v2p.mappings { + let vni = mapping + .vni + .as_u32() + .try_into() + .expect("opte VNI should be 24 bits"); + + for entry in mapping.ip4 { + mappings.push(VirtualNetworkInterfaceHost { + virtual_ip: IpAddr::V4(entry.0.into()), + virtual_mac: MacAddr6::from(entry.1.ether.bytes()).into(), + physical_host_ip: entry.1.ip.into(), + vni, + }); + } + + for entry in mapping.ip6 { + mappings.push(VirtualNetworkInterfaceHost { + virtual_ip: IpAddr::V6(entry.0.into()), + virtual_mac: MacAddr6::from(entry.1.ether.bytes()).into(), + physical_host_ip: entry.1.ip.into(), + vni, + }); + } + } + + Ok(mappings) + } + + #[cfg(not(target_os = "illumos"))] + pub fn list_virtual_nics( + &self, + ) -> Result, Error> { + info!( + self.inner.log, + "Listing virtual nics (ignored)"; + ); + Ok(vec![]) + } + #[cfg(target_os = "illumos")] pub fn set_virtual_nic_host( &self, - mapping: &SetVirtualNetworkInterfaceHost, + mapping: &VirtualNetworkInterfaceHost, ) -> Result<(), Error> { use opte_ioctl::OpteHdl; @@ -600,7 +651,7 @@ impl PortManager { #[cfg(not(target_os = "illumos"))] pub fn set_virtual_nic_host( &self, - mapping: &SetVirtualNetworkInterfaceHost, + mapping: &VirtualNetworkInterfaceHost, ) -> Result<(), Error> { info!( self.inner.log, @@ -613,20 +664,41 @@ impl PortManager { #[cfg(target_os = "illumos")] pub fn unset_virtual_nic_host( &self, - _mapping: &DeleteVirtualNetworkInterfaceHost, + mapping: &VirtualNetworkInterfaceHost, ) -> Result<(), Error> { - // TODO requires https://github.com/oxidecomputer/opte/issues/332 + use opte_ioctl::OpteHdl; + + info!( + self.inner.log, + "Clearing mapping of virtual NIC to physical host"; + "mapping" => ?&mapping, + ); + + let hdl = OpteHdl::open(OpteHdl::XDE_CTL)?; + hdl.clear_v2p(&oxide_vpc::api::ClearVirt2PhysReq { + vip: mapping.virtual_ip.into(), + phys: oxide_vpc::api::PhysNet { + ether: oxide_vpc::api::MacAddr::from( + (*mapping.virtual_mac).into_array(), + ), + ip: mapping.physical_host_ip.into(), + vni: Vni::new(mapping.vni).unwrap(), + }, + })?; - slog::warn!(self.inner.log, "unset_virtual_nic_host unimplmented"); Ok(()) } #[cfg(not(target_os = "illumos"))] pub fn unset_virtual_nic_host( &self, - _mapping: &DeleteVirtualNetworkInterfaceHost, + mapping: &VirtualNetworkInterfaceHost, ) -> Result<(), Error> { - info!(self.inner.log, "Ignoring unset of virtual NIC mapping"); + info!( + self.inner.log, + "Ignoring unset of virtual NIC mapping"; + "mapping" => ?&mapping, + ); Ok(()) } } diff --git a/nexus-config/src/nexus_config.rs b/nexus-config/src/nexus_config.rs index 01f642a36b..08517026ef 100644 --- a/nexus-config/src/nexus_config.rs +++ b/nexus-config/src/nexus_config.rs @@ -379,6 +379,8 @@ pub struct BackgroundTaskConfig { pub instance_watcher: InstanceWatcherConfig, /// configuration for service VPC firewall propagation task pub service_firewall_propagation: ServiceFirewallPropagationConfig, + /// configuration for v2p mapping propagation task + pub v2p_mapping_propagation: V2PMappingPropagationConfig, } #[serde_as] @@ -539,6 +541,14 @@ pub struct ServiceFirewallPropagationConfig { pub period_secs: Duration, } +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct V2PMappingPropagationConfig { + /// period (in seconds) for periodic activations of this background task + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, +} + /// Configuration for a nexus server #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] pub struct PackageConfig { @@ -777,6 +787,7 @@ mod test { region_replacement.period_secs = 30 instance_watcher.period_secs = 30 service_firewall_propagation.period_secs = 300 + v2p_mapping_propagation.period_secs = 30 [default_region_allocation_strategy] type = "random" seed = 0 @@ -911,7 +922,10 @@ mod test { service_firewall_propagation: ServiceFirewallPropagationConfig { period_secs: Duration::from_secs(300), - } + }, + v2p_mapping_propagation: V2PMappingPropagationConfig { + period_secs: Duration::from_secs(30) + }, }, default_region_allocation_strategy: crate::nexus_config::RegionAllocationStrategy::Random { @@ -980,6 +994,7 @@ mod test { region_replacement.period_secs = 30 instance_watcher.period_secs = 30 service_firewall_propagation.period_secs = 300 + v2p_mapping_propagation.period_secs = 30 [default_region_allocation_strategy] type = "random" "##, diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index c7b495b094..205885cfd8 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -55,6 +55,7 @@ mod project; mod semver_version; mod switch_interface; mod switch_port; +mod v2p_mapping; // These actually represent subqueries, not real table. // However, they must be defined in the same crate as our tables // for join-based marker trait generation. @@ -188,6 +189,7 @@ pub use typed_uuid::to_db_typed_uuid; pub use upstairs_repair::*; pub use user_builtin::*; pub use utilization::*; +pub use v2p_mapping::*; pub use virtual_provisioning_collection::*; pub use virtual_provisioning_resource::*; pub use vmm::*; diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 224c461da0..423388de30 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -285,6 +285,17 @@ table! { } } +table! { + v2p_mapping_view (nic_id) { + nic_id -> Uuid, + sled_id -> Uuid, + sled_ip -> Inet, + vni -> Int4, + mac -> Int8, + ip -> Inet, + } +} + table! { bgp_announce_set (id) { id -> Uuid, diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index cb229274fe..ed4b762e68 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(63, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(64, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(64, "add-view-for-v2p-mappings"), KnownVersion::new(63, "remove-producer-base-route-column"), KnownVersion::new(62, "allocate-subnet-decommissioned-sleds"), KnownVersion::new(61, "blueprint-add-sled-state"), diff --git a/nexus/db-model/src/v2p_mapping.rs b/nexus/db-model/src/v2p_mapping.rs new file mode 100644 index 0000000000..43831f7503 --- /dev/null +++ b/nexus/db-model/src/v2p_mapping.rs @@ -0,0 +1,16 @@ +use crate::schema::v2p_mapping_view; +use crate::{MacAddr, Vni}; +use ipnetwork::IpNetwork; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +#[derive(Queryable, Selectable, Clone, Debug, Serialize, Deserialize)] +#[diesel(table_name = v2p_mapping_view)] +pub struct V2PMappingView { + pub nic_id: Uuid, + pub sled_id: Uuid, + pub sled_ip: IpNetwork, + pub vni: Vni, + pub mac: MacAddr, + pub ip: IpNetwork, +} diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 9f2d2d02db..7c2cf8cf81 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -94,6 +94,7 @@ mod switch_port; pub(crate) mod test_utils; mod update; mod utilization; +mod v2p_mapping; mod virtual_provisioning_collection; mod vmm; mod volume; diff --git a/nexus/db-queries/src/db/datastore/network_interface.rs b/nexus/db-queries/src/db/datastore/network_interface.rs index 733e4ef32b..f552e845c6 100644 --- a/nexus/db-queries/src/db/datastore/network_interface.rs +++ b/nexus/db-queries/src/db/datastore/network_interface.rs @@ -792,6 +792,62 @@ impl DataStore { public_error_from_diesel(e, ErrorHandler::Server) }) } + + /// List all network interfaces associated with all instances, making as + /// many queries as needed to get them all + /// + /// This should generally not be used in API handlers or other + /// latency-sensitive contexts, but it can make sense in saga actions or + /// background tasks. + /// + /// This particular method was added for propagating v2p mappings via RPWs + pub async fn instance_network_interfaces_all_list_batched( + &self, + opctx: &OpContext, + ) -> ListResultVec { + opctx.check_complex_operations_allowed()?; + + let mut all_interfaces = Vec::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = self + .instance_network_interfaces_all_list( + opctx, + &p.current_pagparams(), + ) + .await?; + paginator = p + .found_batch(&batch, &|nic: &InstanceNetworkInterface| { + nic.id() + }); + all_interfaces.extend(batch); + } + Ok(all_interfaces) + } + + /// List one page of all network interfaces associated with instances + pub async fn instance_network_interfaces_all_list( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + use db::schema::instance_network_interface::dsl; + + // See the comment in `service_create_network_interface`. There's no + // obvious parent for a service network interface (as opposed to + // instance network interfaces, which require ListChildren on the + // instance to list). As a logical proxy, we check for listing children + // of the service IP pool. + let (authz_pool, _pool) = self.ip_pools_service_lookup(opctx).await?; + opctx.authorize(authz::Action::ListChildren, &authz_pool).await?; + + paginated(dsl::instance_network_interface, dsl::id, pagparams) + .filter(dsl::time_deleted.is_null()) + .select(InstanceNetworkInterface::as_select()) + .get_results_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } } #[cfg(test)] diff --git a/nexus/db-queries/src/db/datastore/v2p_mapping.rs b/nexus/db-queries/src/db/datastore/v2p_mapping.rs new file mode 100644 index 0000000000..6c00957e7d --- /dev/null +++ b/nexus/db-queries/src/db/datastore/v2p_mapping.rs @@ -0,0 +1,45 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::DataStore; +use crate::context::OpContext; +use crate::db; +use crate::db::datastore::SQL_BATCH_SIZE; +use crate::db::error::{public_error_from_diesel, ErrorHandler}; +use crate::db::model::V2PMappingView; +use crate::db::pagination::paginated; +use crate::db::pagination::Paginator; +use async_bb8_diesel::AsyncRunQueryDsl; +use diesel::{QueryDsl, SelectableHelper}; +use omicron_common::api::external::ListResultVec; + +impl DataStore { + pub async fn v2p_mappings( + &self, + opctx: &OpContext, + ) -> ListResultVec { + use db::schema::v2p_mapping_view::dsl; + + opctx.check_complex_operations_allowed()?; + + let mut mappings = Vec::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::v2p_mapping_view, + dsl::nic_id, + &p.current_pagparams(), + ) + .select(V2PMappingView::as_select()) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + paginator = p.found_batch(&batch, &|mapping| mapping.nic_id); + mappings.extend(batch); + } + + Ok(mappings) + } +} diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index d3faf2459c..cba2edb7e6 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -116,6 +116,7 @@ region_replacement.period_secs = 30 # How frequently to query the status of active instances. instance_watcher.period_secs = 30 service_firewall_propagation.period_secs = 300 +v2p_mapping_propagation.period_secs = 30 [default_region_allocation_strategy] # allocate region on 3 random distinct zpools, on 3 random distinct sleds. diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index d2f940018d..f7b7291c59 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -22,6 +22,7 @@ use super::region_replacement; use super::service_firewall_rules; use super::sync_service_zone_nat::ServiceZoneNatTracker; use super::sync_switch_configuration::SwitchPortSettingsManager; +use super::v2p_mappings::V2PManager; use crate::app::oximeter::PRODUCER_LEASE_DURATION; use crate::app::sagas::SagaRequest; use nexus_config::BackgroundTaskConfig; @@ -90,6 +91,9 @@ pub struct BackgroundTasks { /// task handle for the switch port settings manager pub task_switch_port_settings_manager: common::TaskHandle, + /// task handle for the opte v2p manager + pub task_v2p_manager: common::TaskHandle, + /// task handle for the task that detects if regions need replacement and /// begins the process pub task_region_replacement: common::TaskHandle, @@ -113,6 +117,10 @@ impl BackgroundTasks { nexus_id: Uuid, resolver: internal_dns::resolver::Resolver, saga_request: Sender, + v2p_watcher: ( + tokio::sync::watch::Sender<()>, + tokio::sync::watch::Receiver<()>, + ), producer_registry: &ProducerRegistry, ) -> BackgroundTasks { let mut driver = common::Driver::new(); @@ -332,6 +340,17 @@ impl BackgroundTasks { ) }; + let task_v2p_manager = { + driver.register( + "v2p_manager".to_string(), + String::from("manages opte v2p mappings for vpc networking"), + config.v2p_mapping_propagation.period_secs, + Box::new(V2PManager::new(datastore.clone())), + opctx.child(BTreeMap::new()), + vec![Box::new(v2p_watcher.1)], + ) + }; + // Background task: detect if a region needs replacement and begin the // process let task_region_replacement = { @@ -358,6 +377,7 @@ impl BackgroundTasks { resolver.clone(), producer_registry, instance_watcher::WatcherIdentity { nexus_id, rack_id }, + v2p_watcher.0, ); driver.register( "instance_watcher".to_string(), @@ -401,6 +421,7 @@ impl BackgroundTasks { task_blueprint_executor, task_service_zone_nat_tracker, task_switch_port_settings_manager, + task_v2p_manager, task_region_replacement, task_instance_watcher, task_service_firewall_propagation, diff --git a/nexus/src/app/background/instance_watcher.rs b/nexus/src/app/background/instance_watcher.rs index 4cdca3c4b7..d473ea8e99 100644 --- a/nexus/src/app/background/instance_watcher.rs +++ b/nexus/src/app/background/instance_watcher.rs @@ -35,6 +35,7 @@ pub(crate) struct InstanceWatcher { resolver: internal_dns::resolver::Resolver, metrics: Arc>, id: WatcherIdentity, + v2p_notification_tx: tokio::sync::watch::Sender<()>, } const MAX_SLED_AGENTS: NonZeroU32 = unsafe { @@ -48,12 +49,13 @@ impl InstanceWatcher { resolver: internal_dns::resolver::Resolver, producer_registry: &ProducerRegistry, id: WatcherIdentity, + v2p_notification_tx: tokio::sync::watch::Sender<()>, ) -> Self { let metrics = Arc::new(Mutex::new(metrics::Metrics::default())); producer_registry .register_producer(metrics::Producer(metrics.clone())) .unwrap(); - Self { datastore, resolver, metrics, id } + Self { datastore, resolver, metrics, id, v2p_notification_tx } } fn check_instance( @@ -73,6 +75,7 @@ impl InstanceWatcher { .collect(), ); let client = client.clone(); + let v2p_notification_tx = self.v2p_notification_tx.clone(); async move { slog::trace!(opctx.log, "checking on instance..."); @@ -153,6 +156,7 @@ impl InstanceWatcher { &opctx.log, &target.instance_id, &new_runtime_state, + v2p_notification_tx, ) .await .map_err(|e| { diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index 512c782b2e..38bde3c048 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -25,5 +25,6 @@ mod service_firewall_rules; mod status; mod sync_service_zone_nat; mod sync_switch_configuration; +mod v2p_mappings; pub use init::BackgroundTasks; diff --git a/nexus/src/app/background/v2p_mappings.rs b/nexus/src/app/background/v2p_mappings.rs new file mode 100644 index 0000000000..a53ac3442f --- /dev/null +++ b/nexus/src/app/background/v2p_mappings.rs @@ -0,0 +1,165 @@ +use std::{collections::HashSet, sync::Arc}; + +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_model::{Sled, SledState}; +use nexus_db_queries::{context::OpContext, db::DataStore}; +use nexus_networking::sled_client_from_address; +use nexus_types::{ + deployment::SledFilter, external_api::views::SledPolicy, identity::Asset, +}; +use omicron_common::api::external::Vni; +use serde_json::json; +use sled_agent_client::types::VirtualNetworkInterfaceHost; + +use super::common::BackgroundTask; + +pub struct V2PManager { + datastore: Arc, +} + +impl V2PManager { + pub fn new(datastore: Arc) -> Self { + Self { datastore } + } +} + +impl BackgroundTask for V2PManager { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + let log = opctx.log.clone(); + + async move { + // Get the v2p mappings + let v2p_mappings = match self.datastore.v2p_mappings(opctx).await { + Ok(v) => v, + Err(e) => { + let msg = format!("failed to list v2p mappings: {:#}", e); + error!(&log, "{msg}"); + return json!({"error": msg}); + } + }; + + // Get sleds + // we only care about sleds that are active && inservice + let sleds = match self.datastore.sled_list_all_batched(opctx, SledFilter::InService).await + { + Ok(v) => v, + Err(e) => { + let msg = format!("failed to enumerate sleds: {:#}", e); + error!(&log, "{msg}"); + return json!({"error": msg}); + } + } + .into_iter() + .filter(|sled| { + matches!(sled.state(), SledState::Active) + && matches!(sled.policy(), SledPolicy::InService { .. }) + }); + + // Map sled db records to sled-agent clients + let sled_clients: Vec<(Sled, sled_agent_client::Client)> = sleds + .map(|sled| { + let client = sled_client_from_address( + sled.id(), + sled.address(), + &log, + ); + (sled, client) + }) + .collect(); + + // create a set of updates from the v2p mappings + let desired_v2p: HashSet<_> = v2p_mappings + .into_iter() + .filter_map(|mapping| { + let physical_host_ip = match mapping.sled_ip.ip() { + std::net::IpAddr::V4(v) => { + // sled ip should never be ipv4 + error!( + &log, + "sled ip should be ipv6 but is ipv4: {v}" + ); + return None; + } + std::net::IpAddr::V6(v) => v, + }; + + let vni = mapping.vni.0; + + let mapping = VirtualNetworkInterfaceHost { + virtual_ip: mapping.ip.ip(), + virtual_mac: *mapping.mac, + physical_host_ip, + vni, + }; + Some(mapping) + }) + .collect(); + + for (sled, client) in sled_clients { + // + // Get the current mappings on each sled + // Ignore vopte interfaces that are used for services. Service zones only need + // an opte interface for external communication. For services zones, intra-sled + // communication is facilitated via zone underlay interfaces / addresses, + // not opte interfaces / v2p mappings. + // + let found_v2p: HashSet = match client.list_v2p().await { + Ok(v) => v.into_inner(), + Err(e) => { + error!( + &log, + "unable to list opte v2p mappings for sled"; + "sled" => sled.serial_number(), + "error" => ?e + ); + continue; + } + }.into_iter().filter(|vnic| vnic.vni != Vni::SERVICES_VNI).collect(); + + info!(&log, "found opte v2p mappings"; "sled" => sled.serial_number(), "interfaces" => ?found_v2p); + + let v2p_to_add: Vec<_> = desired_v2p.difference(&found_v2p).collect(); + + let v2p_to_del: Vec<_> = found_v2p.difference(&desired_v2p).collect(); + + // + // Generally, we delete stale entries before adding new entries in RPWs to prevent stale entries + // from causing a conflict with an incoming entry. In the case of opte it doesn't matter which + // order we perform the next two steps in, since conflicting stale entries are overwritten by the + // incoming entries. + // + info!(&log, "v2p mappings to delete"; "sled" => sled.serial_number(), "mappings" => ?v2p_to_del); + for mapping in v2p_to_del { + if let Err(e) = client.del_v2p(&mapping).await { + error!( + &log, + "failed to delete v2p mapping from sled"; + "sled" => sled.serial_number(), + "mapping" => ?mapping, + "error" => ?e, + ); + } + } + + info!(&log, "v2p mappings to add"; "sled" => sled.serial_number(), "mappings" => ?v2p_to_add); + for mapping in v2p_to_add { + if let Err(e) = client.set_v2p(mapping).await { + error!( + &log, + "failed to add v2p mapping to sled"; + "sled" => sled.serial_number(), + "mapping" => ?mapping, + "error" => ?e, + ); + } + } + } + json!({}) + } + .boxed() + } +} diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 50b46c8e8d..63b080b436 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -1515,13 +1515,14 @@ impl super::Nexus { new_runtime_state: &nexus::SledInstanceState, ) -> Result<(), Error> { notify_instance_updated( - &self.db_datastore, + &self.datastore(), &self.resolver().await, &self.opctx_alloc, opctx, &self.log, instance_id, new_runtime_state, + self.v2p_notification_tx.clone(), ) .await?; Ok(()) @@ -1965,6 +1966,7 @@ pub(crate) struct InstanceUpdated { /// Invoked by a sled agent to publish an updated runtime state for an /// Instance. +#[allow(clippy::too_many_arguments)] // :( pub(crate) async fn notify_instance_updated( datastore: &DataStore, resolver: &internal_dns::resolver::Resolver, @@ -1973,6 +1975,7 @@ pub(crate) async fn notify_instance_updated( log: &slog::Logger, instance_id: &Uuid, new_runtime_state: &nexus::SledInstanceState, + v2p_notification_tx: tokio::sync::watch::Sender<()>, ) -> Result, Error> { let propolis_id = new_runtime_state.propolis_id; @@ -2011,6 +2014,7 @@ pub(crate) async fn notify_instance_updated( &authz_instance, db_instance.runtime(), &new_runtime_state.instance_state, + v2p_notification_tx.clone(), ) .await?; diff --git a/nexus/src/app/instance_network.rs b/nexus/src/app/instance_network.rs index 30bea98cc6..de4de492e0 100644 --- a/nexus/src/app/instance_network.rs +++ b/nexus/src/app/instance_network.rs @@ -15,24 +15,20 @@ use nexus_db_model::Vni as DbVni; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; -use nexus_db_queries::db::identity::Asset; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::DataStore; -use nexus_types::deployment::SledFilter; -use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::Ipv4Net; use omicron_common::api::external::Ipv6Net; use omicron_common::api::internal::nexus; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::SwitchLocation; -use omicron_common::retry_until_known_result; -use sled_agent_client::types::DeleteVirtualNetworkInterfaceHost; -use sled_agent_client::types::SetVirtualNetworkInterfaceHost; use std::collections::HashSet; use std::str::FromStr; use uuid::Uuid; +use super::background::BackgroundTasks; + impl super::Nexus { /// Returns the set of switches with uplinks configured and boundary /// services enabled. @@ -43,41 +39,6 @@ impl super::Nexus { boundary_switches(&self.db_datastore, opctx).await } - /// Ensures that V2P mappings exist that indicate that the instance with ID - /// `instance_id` is resident on the sled with ID `sled_id`. - pub(crate) async fn create_instance_v2p_mappings( - &self, - opctx: &OpContext, - instance_id: Uuid, - sled_id: Uuid, - ) -> Result<(), Error> { - create_instance_v2p_mappings( - &self.db_datastore, - &self.log, - opctx, - &self.opctx_alloc, - instance_id, - sled_id, - ) - .await - } - - /// Ensure that the necessary v2p mappings for an instance are deleted - pub(crate) async fn delete_instance_v2p_mappings( - &self, - opctx: &OpContext, - instance_id: Uuid, - ) -> Result<(), Error> { - delete_instance_v2p_mappings( - &self.db_datastore, - &self.log, - opctx, - &self.opctx_alloc, - instance_id, - ) - .await - } - /// Ensures that the Dendrite configuration for the supplied instance is /// up-to-date. /// @@ -239,6 +200,7 @@ impl super::Nexus { opctx, &self.opctx_alloc, probe_id, + &self.background_tasks, ) .await } @@ -303,6 +265,7 @@ pub(crate) async fn ensure_updated_instance_network_config( authz_instance: &authz::Instance, prev_instance_state: &db::model::InstanceRuntimeState, new_instance_state: &nexus::InstanceRuntimeState, + v2p_notification_tx: tokio::sync::watch::Sender<()>, ) -> Result<(), Error> { let instance_id = authz_instance.id(); @@ -333,6 +296,7 @@ pub(crate) async fn ensure_updated_instance_network_config( opctx, opctx_alloc, authz_instance, + v2p_notification_tx, ) .await?; return Ok(()); @@ -412,15 +376,13 @@ pub(crate) async fn ensure_updated_instance_network_config( Err(e) => return Err(e), }; - create_instance_v2p_mappings( - datastore, - log, - opctx, - opctx_alloc, - instance_id, - new_sled_id, - ) - .await?; + if let Err(e) = v2p_notification_tx.send(()) { + error!( + log, + "error notifying background task of v2p change"; + "error" => ?e + ) + }; let (.., sled) = LookupPath::new(opctx, datastore).sled_id(new_sled_id).fetch().await?; @@ -735,20 +697,19 @@ pub(crate) async fn probe_ensure_dpd_config( async fn clear_instance_networking_state( datastore: &DataStore, log: &slog::Logger, - resolver: &internal_dns::resolver::Resolver, opctx: &OpContext, opctx_alloc: &OpContext, authz_instance: &authz::Instance, + v2p_notification_tx: tokio::sync::watch::Sender<()>, ) -> Result<(), Error> { - delete_instance_v2p_mappings( - datastore, - log, - opctx, - opctx_alloc, - authz_instance.id(), - ) - .await?; + if let Err(e) = v2p_notification_tx.send(()) { + error!( + log, + "error notifying background task of v2p change"; + "error" => ?e + ) + }; instance_delete_dpd_config( datastore, @@ -771,253 +732,6 @@ async fn clear_instance_networking_state( .await } -/// Ensures that V2P mappings exist that indicate that the instance with ID -/// `instance_id` is resident on the sled with ID `sled_id`. -pub(crate) async fn create_instance_v2p_mappings( - datastore: &DataStore, - log: &slog::Logger, - opctx: &OpContext, - opctx_alloc: &OpContext, - instance_id: Uuid, - sled_id: Uuid, -) -> Result<(), Error> { - info!(log, "creating V2P mappings for instance"; - "instance_id" => %instance_id, - "sled_id" => %sled_id); - - // For every sled that isn't the sled this instance was allocated to, create - // a virtual to physical mapping for each of this instance's NICs. - // - // For the mappings to be correct, a few invariants must hold: - // - // - mappings must be set whenever an instance's sled changes (eg. - // during instance creation, migration, stop + start) - // - // - an instances' sled must not change while its corresponding mappings - // are being created - // - // - the same mapping creation must be broadcast to all sleds - // - // A more targeted approach would be to see what other instances share - // the VPC this instance is in (or more generally, what instances should - // have connectivity to this one), see what sleds those are allocated - // to, and only create V2P mappings for those sleds. - // - // There's additional work with this approach: - // - // - it means that delete calls are required as well as set calls, - // meaning that now the ordering of those matters (this may also - // necessitate a generation number for V2P mappings) - // - // - V2P mappings have to be bidirectional in order for both instances's - // packets to make a round trip. This isn't a problem with the - // broadcast approach because one of the sides will exist already, but - // it is something to orchestrate with a more targeted approach. - // - // TODO-correctness Default firewall rules currently will block - // instances in different VPCs from connecting to each other. If it ever - // stops doing this, the broadcast approach will create V2P mappings - // that shouldn't exist. - let (.., authz_instance) = LookupPath::new(&opctx, &datastore) - .instance_id(instance_id) - .lookup_for(authz::Action::Read) - .await?; - - let instance_nics = datastore - .derive_guest_network_interface_info(&opctx, &authz_instance) - .await?; - - // Look up the supplied sled's physical host IP. - let physical_host_ip = - nexus_networking::sled_lookup(&datastore, &opctx_alloc, sled_id)? - .fetch() - .await? - .1 - .ip - .into(); - - let mut last_sled_id: Option = None; - loop { - let pagparams = DataPageParams { - marker: last_sled_id.as_ref(), - direction: dropshot::PaginationOrder::Ascending, - limit: std::num::NonZeroU32::new(10).unwrap(), - }; - - let sleds_page = datastore - // XXX: InService might not be exactly correct - .sled_list(&opctx_alloc, &pagparams, SledFilter::InService) - .await?; - let mut join_handles = - Vec::with_capacity(sleds_page.len() * instance_nics.len()); - - for sled in &sleds_page { - // set_v2p not required for sled instance was allocated to, OPTE - // currently does that automatically - // - // TODO(#3107): Remove this when XDE stops creating mappings - // implicitly. - if sled.id() == sled_id { - continue; - } - - for nic in &instance_nics { - let client = nexus_networking::sled_client( - datastore, - opctx_alloc, - sled.id(), - log, - ) - .await?; - let nic_id = nic.id; - let mapping = SetVirtualNetworkInterfaceHost { - virtual_ip: nic.ip, - virtual_mac: nic.mac, - physical_host_ip, - vni: nic.vni, - }; - - let log = log.clone(); - - // This function is idempotent: calling the set_v2p ioctl with - // the same information is a no-op. - join_handles.push(tokio::spawn(futures::future::lazy( - move |_ctx| async move { - retry_until_known_result(&log, || async { - client.set_v2p(&nic_id, &mapping).await - }) - .await - }, - ))); - } - } - - // Concurrently run each future to completion, but return the last - // error seen. - let mut error = None; - for join_handle in join_handles { - let result = join_handle - .await - .map_err(|e| Error::internal_error(&e.to_string()))? - .await; - - if result.is_err() { - error!(log, "{:?}", result); - error = Some(result); - } - } - if let Some(e) = error { - return e.map(|_| ()).map_err(|e| e.into()); - } - - if sleds_page.len() < 10 { - break; - } - - if let Some(last) = sleds_page.last() { - last_sled_id = Some(last.id()); - } - } - - Ok(()) -} - -/// Ensure that the necessary v2p mappings for an instance are deleted -pub(crate) async fn delete_instance_v2p_mappings( - datastore: &DataStore, - log: &slog::Logger, - opctx: &OpContext, - opctx_alloc: &OpContext, - instance_id: Uuid, -) -> Result<(), Error> { - // For every sled that isn't the sled this instance was allocated to, delete - // the virtual to physical mapping for each of this instance's NICs. If - // there isn't a V2P mapping, del_v2p should be a no-op. - let (.., authz_instance) = LookupPath::new(&opctx, datastore) - .instance_id(instance_id) - .lookup_for(authz::Action::Read) - .await?; - - let instance_nics = datastore - .derive_guest_network_interface_info(&opctx, &authz_instance) - .await?; - - let mut last_sled_id: Option = None; - - loop { - let pagparams = DataPageParams { - marker: last_sled_id.as_ref(), - direction: dropshot::PaginationOrder::Ascending, - limit: std::num::NonZeroU32::new(10).unwrap(), - }; - - let sleds_page = datastore - // XXX: InService might not be exactly correct - .sled_list(&opctx_alloc, &pagparams, SledFilter::InService) - .await?; - let mut join_handles = - Vec::with_capacity(sleds_page.len() * instance_nics.len()); - - for sled in &sleds_page { - for nic in &instance_nics { - let client = nexus_networking::sled_client( - &datastore, - &opctx_alloc, - sled.id(), - &log, - ) - .await?; - let nic_id = nic.id; - let mapping = DeleteVirtualNetworkInterfaceHost { - virtual_ip: nic.ip, - vni: nic.vni, - }; - - let log = log.clone(); - - // This function is idempotent: calling the set_v2p ioctl with - // the same information is a no-op. - join_handles.push(tokio::spawn(futures::future::lazy( - move |_ctx| async move { - retry_until_known_result(&log, || async { - client.del_v2p(&nic_id, &mapping).await - }) - .await - }, - ))); - } - } - - // Concurrently run each future to completion, but return the last - // error seen. - let mut error = None; - for join_handle in join_handles { - let result = join_handle - .await - .map_err(|e| Error::internal_error(&e.to_string()))? - .await; - - if result.is_err() { - error!(log, "{:?}", result); - error = Some(result); - } - } - if let Some(e) = error { - return e.map(|_| ()).map_err(|e| e.into()); - } - - if sleds_page.len() < 10 { - break; - } - - if let Some(last) = sleds_page.last() { - last_sled_id = Some(last.id()); - } - } - - Ok(()) -} - /// Attempts to delete all of the Dendrite NAT configuration for the /// instance identified by `authz_instance`. /// @@ -1083,6 +797,7 @@ pub(crate) async fn probe_delete_dpd_config( opctx: &OpContext, opctx_alloc: &OpContext, probe_id: Uuid, + background_tasks: &BackgroundTasks, ) -> Result<(), Error> { info!(log, "deleting probe dpd configuration"; "probe_id" => %probe_id); @@ -1139,6 +854,7 @@ pub(crate) async fn probe_delete_dpd_config( } }; + background_tasks.activate(&background_tasks.task_v2p_manager); // Notify dendrite that there are changes for it to reconcile. // In the event of a failure to notify dendrite, we'll log an error // and rely on dendrite's RPW timer to catch it up. diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 4b77788c96..3083a8e761 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -200,6 +200,9 @@ pub struct Nexus { /// Default Crucible region allocation strategy default_region_allocation_strategy: RegionAllocationStrategy, + + /// Channel for notifying background task of change to opte v2p state + v2p_notification_tx: tokio::sync::watch::Sender<()>, } impl Nexus { @@ -390,6 +393,8 @@ impl Nexus { Arc::clone(&db_datastore), ); + let v2p_watcher_channel = tokio::sync::watch::channel(()); + let (saga_request, mut saga_request_recv) = SagaRequest::channel(); let background_tasks = background::BackgroundTasks::start( @@ -400,6 +405,7 @@ impl Nexus { config.deployment.id, resolver.clone(), saga_request, + v2p_watcher_channel.clone(), producer_registry, ); @@ -453,6 +459,7 @@ impl Nexus { .pkg .default_region_allocation_strategy .clone(), + v2p_notification_tx: v2p_watcher_channel.0, }; // TODO-cleanup all the extra Arcs here seems wrong diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index a6df7183d1..a6771f65a0 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -1287,9 +1287,7 @@ pub mod test { assert!(no_instances_or_disks_on_sled(&sled_agent).await); let v2p_mappings = &*sled_agent.v2p_mappings.lock().await; - for (_nic_id, mappings) in v2p_mappings { - assert!(mappings.is_empty()); - } + assert!(v2p_mappings.is_empty()); } #[nexus_test(server = crate::Server)] diff --git a/nexus/src/app/sagas/instance_delete.rs b/nexus/src/app/sagas/instance_delete.rs index d93c1455ad..b6fedc175d 100644 --- a/nexus/src/app/sagas/instance_delete.rs +++ b/nexus/src/app/sagas/instance_delete.rs @@ -102,6 +102,7 @@ async fn sid_delete_network_interfaces( sagactx: NexusActionContext, ) -> Result<(), ActionError> { let osagactx = sagactx.user_data(); + let nexus = osagactx.nexus(); let params = sagactx.saga_params::()?; let opctx = crate::context::op_context_for_saga_action( &sagactx, @@ -112,6 +113,7 @@ async fn sid_delete_network_interfaces( .instance_delete_all_network_interfaces(&opctx, ¶ms.authz_instance) .await .map_err(ActionError::action_failed)?; + nexus.background_tasks.activate(&nexus.background_tasks.task_v2p_manager); Ok(()) } diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index b76bc2e37d..e7caedfc9c 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -447,50 +447,18 @@ async fn sis_dpd_ensure_undo( async fn sis_v2p_ensure( sagactx: NexusActionContext, ) -> Result<(), ActionError> { - let params = sagactx.saga_params::()?; let osagactx = sagactx.user_data(); - let instance_id = params.db_instance.id(); - - info!(osagactx.log(), "start saga: ensuring v2p mappings are configured"; - "instance_id" => %instance_id); - - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let sled_uuid = sagactx.lookup::("sled_id")?; - osagactx - .nexus() - .create_instance_v2p_mappings(&opctx, instance_id, sled_uuid) - .await - .map_err(ActionError::action_failed)?; - + let nexus = osagactx.nexus(); + nexus.background_tasks.activate(&nexus.background_tasks.task_v2p_manager); Ok(()) } async fn sis_v2p_ensure_undo( sagactx: NexusActionContext, ) -> Result<(), anyhow::Error> { - let params = sagactx.saga_params::()?; let osagactx = sagactx.user_data(); - let instance_id = params.db_instance.id(); - let sled_id = sagactx.lookup::("sled_id")?; - info!(osagactx.log(), "start saga: undoing v2p configuration"; - "instance_id" => %instance_id, - "sled_id" => %sled_id); - - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - osagactx - .nexus() - .delete_instance_v2p_mappings(&opctx, instance_id) - .await - .map_err(ActionError::action_failed)?; - + let nexus = osagactx.nexus(); + nexus.background_tasks.activate(&nexus.background_tasks.task_v2p_manager); Ok(()) } diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 25a6d97efc..49a61cfa36 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -111,6 +111,7 @@ switch_port_settings_manager.period_secs = 30 region_replacement.period_secs = 30 instance_watcher.period_secs = 30 service_firewall_propagation.period_secs = 300 +v2p_mapping_propagation.period_secs = 30 [default_region_allocation_strategy] # we only have one sled in the test environment, so we need to use the diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 7ad52b9919..51e2552e85 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -66,6 +66,7 @@ use omicron_nexus::app::MIN_MEMORY_BYTES_PER_INSTANCE; use omicron_nexus::Nexus; use omicron_nexus::TestInterfaces as _; use omicron_sled_agent::sim::SledAgent; +use omicron_test_utils::dev::poll::wait_for_condition; use sled_agent_client::TestInterfaces as _; use std::convert::TryFrom; use std::net::Ipv4Addr; @@ -660,14 +661,6 @@ async fn test_instance_start_creates_networking_state( .await .unwrap(); - let instance_state = datastore - .instance_fetch_with_vmm(&opctx, &authz_instance) - .await - .unwrap(); - - let sled_id = - instance_state.sled_id().expect("running instance should have a sled"); - let guest_nics = datastore .derive_guest_network_interface_info(&opctx, &authz_instance) .await @@ -675,13 +668,7 @@ async fn test_instance_start_creates_networking_state( assert_eq!(guest_nics.len(), 1); for agent in &sled_agents { - // TODO(#3107) Remove this bifurcation when Nexus programs all mappings - // itself. - if agent.id != sled_id { - assert_sled_v2p_mappings(agent, &nics[0], guest_nics[0].vni).await; - } else { - assert!(agent.v2p_mappings.lock().await.is_empty()); - } + assert_sled_v2p_mappings(agent, &nics[0], guest_nics[0].vni).await; } } @@ -861,24 +848,7 @@ async fn test_instance_migrate_v2p(cptestctx: &ControlPlaneTestContext) { let mut sled_agents = vec![cptestctx.sled_agent.sled_agent.clone()]; sled_agents.extend(other_sleds.iter().map(|tup| tup.1.sled_agent.clone())); for sled_agent in &sled_agents { - // Starting the instance should have programmed V2P mappings to all the - // sleds except the one where the instance is running. - // - // TODO(#3107): In practice, the instance's sled also has V2P mappings, but - // these are established during VMM setup (i.e. as part of creating the - // instance's OPTE ports) instead of being established by explicit calls - // from Nexus. Simulated sled agent handles the latter calls but does - // not currently update any mappings during simulated instance creation, - // so the check below verifies that no mappings exist on the instance's - // own sled instead of checking for a real mapping. Once Nexus programs - // all mappings explicitly (without skipping the instance's current - // sled) this bifurcation should be removed. - if sled_agent.id != original_sled_id { - assert_sled_v2p_mappings(sled_agent, &nics[0], guest_nics[0].vni) - .await; - } else { - assert!(sled_agent.v2p_mappings.lock().await.is_empty()); - } + assert_sled_v2p_mappings(sled_agent, &nics[0], guest_nics[0].vni).await; } let dst_sled_id = if original_sled_id == cptestctx.sled_agent.sled_agent.id @@ -4545,14 +4515,6 @@ async fn test_instance_v2p_mappings(cptestctx: &ControlPlaneTestContext) { .await .unwrap(); - let instance_state = datastore - .instance_fetch_with_vmm(&opctx, &authz_instance) - .await - .unwrap(); - - let sled_id = - instance_state.sled_id().expect("running instance should have a sled"); - let guest_nics = datastore .derive_guest_network_interface_info(&opctx, &authz_instance) .await @@ -4565,14 +4527,7 @@ async fn test_instance_v2p_mappings(cptestctx: &ControlPlaneTestContext) { sled_agents.push(&cptestctx.sled_agent.sled_agent); for sled_agent in &sled_agents { - // TODO(#3107) Remove this bifurcation when Nexus programs all mappings - // itself. - if sled_agent.id != sled_id { - assert_sled_v2p_mappings(sled_agent, &nics[0], guest_nics[0].vni) - .await; - } else { - assert!(sled_agent.v2p_mappings.lock().await.is_empty()); - } + assert_sled_v2p_mappings(sled_agent, &nics[0], guest_nics[0].vni).await; } // Delete the instance @@ -4589,8 +4544,21 @@ async fn test_instance_v2p_mappings(cptestctx: &ControlPlaneTestContext) { // Validate that every sled no longer has the V2P mapping for this instance for sled_agent in &sled_agents { - let v2p_mappings = sled_agent.v2p_mappings.lock().await; - assert!(v2p_mappings.is_empty()); + let condition = || async { + let v2p_mappings = sled_agent.v2p_mappings.lock().await; + if v2p_mappings.is_empty() { + Ok(()) + } else { + Err(CondCheckError::NotYet::<()>) + } + }; + wait_for_condition( + condition, + &Duration::from_secs(1), + &Duration::from_secs(30), + ) + .await + .expect("v2p mappings should be empty"); } } @@ -4687,14 +4655,28 @@ async fn assert_sled_v2p_mappings( nic: &InstanceNetworkInterface, vni: Vni, ) { - let v2p_mappings = sled_agent.v2p_mappings.lock().await; - assert!(!v2p_mappings.is_empty()); - - let mapping = v2p_mappings.get(&nic.identity.id).unwrap().last().unwrap(); - assert_eq!(mapping.virtual_ip, nic.ip); - assert_eq!(mapping.virtual_mac, nic.mac); - assert_eq!(mapping.physical_host_ip, sled_agent.ip); - assert_eq!(mapping.vni, vni); + let condition = || async { + let v2p_mappings = sled_agent.v2p_mappings.lock().await; + let mapping = v2p_mappings.iter().find(|mapping| { + mapping.virtual_ip == nic.ip + && mapping.virtual_mac == nic.mac + && mapping.physical_host_ip == sled_agent.ip + && mapping.vni == vni + }); + + if mapping.is_some() { + Ok(()) + } else { + Err(CondCheckError::NotYet::<()>) + } + }; + wait_for_condition( + condition, + &Duration::from_secs(1), + &Duration::from_secs(30), + ) + .await + .expect("matching v2p mapping should be present"); } /// Simulate completion of an ongoing instance state transition. To do this, we diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 5da2b5c797..7a951a6d15 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -842,26 +842,41 @@ } } }, - "/v2p/{interface_id}": { + "/v2p": { + "get": { + "summary": "List v2p mappings present on sled", + "operationId": "list_v2p", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_VirtualNetworkInterfaceHost", + "type": "array", + "items": { + "$ref": "#/components/schemas/VirtualNetworkInterfaceHost" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, "put": { "summary": "Create a mapping from a virtual NIC to a physical host", "operationId": "set_v2p", - "parameters": [ - { - "in": "path", - "name": "interface_id", - "required": true, - "schema": { - "type": "string", - "format": "uuid" - } - } - ], "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/SetVirtualNetworkInterfaceHost" + "$ref": "#/components/schemas/VirtualNetworkInterfaceHost" } } }, @@ -882,22 +897,11 @@ "delete": { "summary": "Delete a mapping from a virtual NIC to a physical host", "operationId": "del_v2p", - "parameters": [ - { - "in": "path", - "name": "interface_id", - "required": true, - "schema": { - "type": "string", - "format": "uuid" - } - } - ], "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/DeleteVirtualNetworkInterfaceHost" + "$ref": "#/components/schemas/VirtualNetworkInterfaceHost" } } }, @@ -2016,29 +2020,6 @@ "target" ] }, - "DeleteVirtualNetworkInterfaceHost": { - "description": "The data needed to identify a virtual IP for which a sled maintains an OPTE virtual-to-physical mapping such that that mapping can be deleted.", - "type": "object", - "properties": { - "virtual_ip": { - "description": "The virtual IP whose mapping should be deleted.", - "type": "string", - "format": "ip" - }, - "vni": { - "description": "The VNI for the network containing the virtual IP whose mapping should be deleted.", - "allOf": [ - { - "$ref": "#/components/schemas/Vni" - } - ] - } - }, - "required": [ - "virtual_ip", - "vni" - ] - }, "DhcpConfig": { "description": "DHCP configuration for a port\n\nNot present here: Hostname (DHCPv4 option 12; used in DHCPv6 option 39); we use `InstanceRuntimeState::hostname` for this value.", "type": "object", @@ -4285,32 +4266,6 @@ "type": "string", "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$" }, - "SetVirtualNetworkInterfaceHost": { - "description": "A mapping from a virtual NIC to a physical host", - "type": "object", - "properties": { - "physical_host_ip": { - "type": "string", - "format": "ipv6" - }, - "virtual_ip": { - "type": "string", - "format": "ip" - }, - "virtual_mac": { - "$ref": "#/components/schemas/MacAddr" - }, - "vni": { - "$ref": "#/components/schemas/Vni" - } - }, - "required": [ - "physical_host_ip", - "virtual_ip", - "virtual_mac", - "vni" - ] - }, "SledInstanceState": { "description": "A wrapper type containing a sled's total knowledge of the state of a specific VMM and the instance it incarnates.", "type": "object", @@ -4579,6 +4534,32 @@ "version" ] }, + "VirtualNetworkInterfaceHost": { + "description": "A mapping from a virtual NIC to a physical host", + "type": "object", + "properties": { + "physical_host_ip": { + "type": "string", + "format": "ipv6" + }, + "virtual_ip": { + "type": "string", + "format": "ip" + }, + "virtual_mac": { + "$ref": "#/components/schemas/MacAddr" + }, + "vni": { + "$ref": "#/components/schemas/Vni" + } + }, + "required": [ + "physical_host_ip", + "virtual_ip", + "virtual_mac", + "vni" + ] + }, "VmmRuntimeState": { "description": "The dynamic runtime properties of an individual VMM process.", "type": "object", diff --git a/oximeter/collector/tests/output/self-stat-schema.json b/oximeter/collector/tests/output/self-stat-schema.json index 111d7c0ed2..286ac63405 100644 --- a/oximeter/collector/tests/output/self-stat-schema.json +++ b/oximeter/collector/tests/output/self-stat-schema.json @@ -39,7 +39,7 @@ } ], "datum_type": "cumulative_u64", - "created": "2024-05-17T01:26:16.797600385Z" + "created": "2024-05-21T18:32:24.199619581Z" }, "oximeter_collector:failed_collections": { "timeseries_name": "oximeter_collector:failed_collections", @@ -86,6 +86,6 @@ } ], "datum_type": "cumulative_u64", - "created": "2024-05-17T01:26:16.798713487Z" + "created": "2024-05-21T18:32:24.200514936Z" } } \ No newline at end of file diff --git a/package-manifest.toml b/package-manifest.toml index 2bfc51d533..7f80dacf7c 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -533,10 +533,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "025389ff39d594bf2b815377e2c1dc4dd23b1f96" +source.commit = "23b0cf439f9f62b9a4933e55cc72bcaddc9596cd" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//maghemite.sha256.txt -source.sha256 = "f2ee54b6a654daa1c1f817440317e9b11c5ddc71249df261bb5cfa0e6057dc24" +source.sha256 = "1ea0e73e149a68bf91b5ce2e0db2a8a1af50dcdbbf381b672aa9ac7e36a3a181" output.type = "tarball" [package.mg-ddm] @@ -549,10 +549,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "025389ff39d594bf2b815377e2c1dc4dd23b1f96" +source.commit = "23b0cf439f9f62b9a4933e55cc72bcaddc9596cd" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "bb98815f759f38abee9f5aea0978cd33e66e75079cc8c171036be21bf9049c96" +source.sha256 = "3334b0a9d5956e3117a6b493b9a5a31220391fab1ecbfb3a4bd8e94d7030771a" output.type = "zone" output.intermediate_only = true @@ -564,7 +564,7 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "025389ff39d594bf2b815377e2c1dc4dd23b1f96" +source.commit = "23b0cf439f9f62b9a4933e55cc72bcaddc9596cd" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt source.sha256 = "e0907de39ca9f8ab45d40d361a1dbeed4bd8e9b157f8d3d8fe0a4bc259d933bd" diff --git a/schema/crdb/add-view-for-v2p-mappings/up01.sql b/schema/crdb/add-view-for-v2p-mappings/up01.sql new file mode 100644 index 0000000000..96d5723c00 --- /dev/null +++ b/schema/crdb/add-view-for-v2p-mappings/up01.sql @@ -0,0 +1,41 @@ +CREATE VIEW IF NOT EXISTS omicron.public.v2p_mapping_view +AS +WITH VmV2pMappings AS ( + SELECT + n.id as nic_id, + s.id as sled_id, + s.ip as sled_ip, + v.vni, + n.mac, + n.ip + FROM omicron.public.network_interface n + JOIN omicron.public.vpc_subnet vs ON vs.id = n.subnet_id + JOIN omicron.public.vpc v ON v.id = n.vpc_id + JOIN omicron.public.vmm vmm ON n.parent_id = vmm.instance_id + JOIN omicron.public.sled s ON vmm.sled_id = s.id + WHERE n.time_deleted IS NULL + AND n.kind = 'instance' + AND s.sled_policy = 'in_service' + AND s.sled_state = 'active' +), +ProbeV2pMapping AS ( + SELECT + n.id as nic_id, + s.id as sled_id, + s.ip as sled_ip, + v.vni, + n.mac, + n.ip + FROM omicron.public.network_interface n + JOIN omicron.public.vpc_subnet vs ON vs.id = n.subnet_id + JOIN omicron.public.vpc v ON v.id = n.vpc_id + JOIN omicron.public.probe p ON n.parent_id = p.id + JOIN omicron.public.sled s ON p.sled = s.id + WHERE n.time_deleted IS NULL + AND n.kind = 'probe' + AND s.sled_policy = 'in_service' + AND s.sled_state = 'active' +) +SELECT nic_id, sled_id, sled_ip, vni, mac, ip FROM VmV2pMappings +UNION +SELECT nic_id, sled_id, sled_ip, vni, mac, ip FROM ProbeV2pMapping; diff --git a/schema/crdb/add-view-for-v2p-mappings/up02.sql b/schema/crdb/add-view-for-v2p-mappings/up02.sql new file mode 100644 index 0000000000..5ab1075fbe --- /dev/null +++ b/schema/crdb/add-view-for-v2p-mappings/up02.sql @@ -0,0 +1,3 @@ +CREATE INDEX IF NOT EXISTS network_interface_by_parent +ON omicron.public.network_interface (parent_id) +STORING (name, kind, vpc_id, subnet_id, mac, ip, slot); diff --git a/schema/crdb/add-view-for-v2p-mappings/up03.sql b/schema/crdb/add-view-for-v2p-mappings/up03.sql new file mode 100644 index 0000000000..86cef026a1 --- /dev/null +++ b/schema/crdb/add-view-for-v2p-mappings/up03.sql @@ -0,0 +1,2 @@ +CREATE INDEX IF NOT EXISTS sled_by_policy_and_state +ON omicron.public.sled (sled_policy, sled_state, id) STORING (ip); diff --git a/schema/crdb/add-view-for-v2p-mappings/up04.sql b/schema/crdb/add-view-for-v2p-mappings/up04.sql new file mode 100644 index 0000000000..809146b809 --- /dev/null +++ b/schema/crdb/add-view-for-v2p-mappings/up04.sql @@ -0,0 +1,2 @@ +CREATE INDEX IF NOT EXISTS active_vmm +on omicron.public.vmm (time_deleted, sled_id, instance_id); diff --git a/schema/crdb/add-view-for-v2p-mappings/up05.sql b/schema/crdb/add-view-for-v2p-mappings/up05.sql new file mode 100644 index 0000000000..cdabdc6a96 --- /dev/null +++ b/schema/crdb/add-view-for-v2p-mappings/up05.sql @@ -0,0 +1,4 @@ +CREATE INDEX IF NOT EXISTS v2p_mapping_details +ON omicron.public.network_interface ( + time_deleted, kind, subnet_id, vpc_id, parent_id +) STORING (mac, ip); diff --git a/schema/crdb/add-view-for-v2p-mappings/up06.sql b/schema/crdb/add-view-for-v2p-mappings/up06.sql new file mode 100644 index 0000000000..afd10ed13f --- /dev/null +++ b/schema/crdb/add-view-for-v2p-mappings/up06.sql @@ -0,0 +1,2 @@ +CREATE INDEX IF NOT EXISTS sled_by_policy +ON omicron.public.sled (sled_policy) STORING (ip, sled_state); diff --git a/schema/crdb/add-view-for-v2p-mappings/up07.sql b/schema/crdb/add-view-for-v2p-mappings/up07.sql new file mode 100644 index 0000000000..defe411f96 --- /dev/null +++ b/schema/crdb/add-view-for-v2p-mappings/up07.sql @@ -0,0 +1,2 @@ +CREATE INDEX IF NOT EXISTS vmm_by_instance_id +ON omicron.public.vmm (instance_id) STORING (sled_id); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index cc298e4565..2cf9e1100f 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3799,6 +3799,73 @@ ON omicron.public.switch_port (port_settings_id, port_name) STORING (switch_loca CREATE INDEX IF NOT EXISTS switch_port_name ON omicron.public.switch_port (port_name); +COMMIT; +BEGIN; + +-- view for v2p mapping rpw +CREATE VIEW IF NOT EXISTS omicron.public.v2p_mapping_view +AS +WITH VmV2pMappings AS ( + SELECT + n.id as nic_id, + s.id as sled_id, + s.ip as sled_ip, + v.vni, + n.mac, + n.ip + FROM omicron.public.network_interface n + JOIN omicron.public.vpc_subnet vs ON vs.id = n.subnet_id + JOIN omicron.public.vpc v ON v.id = n.vpc_id + JOIN omicron.public.vmm vmm ON n.parent_id = vmm.instance_id + JOIN omicron.public.sled s ON vmm.sled_id = s.id + WHERE n.time_deleted IS NULL + AND n.kind = 'instance' + AND s.sled_policy = 'in_service' + AND s.sled_state = 'active' +), +ProbeV2pMapping AS ( + SELECT + n.id as nic_id, + s.id as sled_id, + s.ip as sled_ip, + v.vni, + n.mac, + n.ip + FROM omicron.public.network_interface n + JOIN omicron.public.vpc_subnet vs ON vs.id = n.subnet_id + JOIN omicron.public.vpc v ON v.id = n.vpc_id + JOIN omicron.public.probe p ON n.parent_id = p.id + JOIN omicron.public.sled s ON p.sled = s.id + WHERE n.time_deleted IS NULL + AND n.kind = 'probe' + AND s.sled_policy = 'in_service' + AND s.sled_state = 'active' +) +SELECT nic_id, sled_id, sled_ip, vni, mac, ip FROM VmV2pMappings +UNION +SELECT nic_id, sled_id, sled_ip, vni, mac, ip FROM ProbeV2pMapping; + +CREATE INDEX IF NOT EXISTS network_interface_by_parent +ON omicron.public.network_interface (parent_id) +STORING (name, kind, vpc_id, subnet_id, mac, ip, slot); + +CREATE INDEX IF NOT EXISTS sled_by_policy_and_state +ON omicron.public.sled (sled_policy, sled_state, id) STORING (ip); + +CREATE INDEX IF NOT EXISTS active_vmm +ON omicron.public.vmm (time_deleted, sled_id, instance_id); + +CREATE INDEX IF NOT EXISTS v2p_mapping_details +ON omicron.public.network_interface ( + time_deleted, kind, subnet_id, vpc_id, parent_id +) STORING (mac, ip); + +CREATE INDEX IF NOT EXISTS sled_by_policy +ON omicron.public.sled (sled_policy) STORING (ip, sled_state); + +CREATE INDEX IF NOT EXISTS vmm_by_instance_id +ON omicron.public.vmm (instance_id) STORING (sled_id); + /* * Metadata for the schema itself. This version number isn't great, as there's * nothing to ensure it gets bumped when it should be, but it's a start. @@ -3859,7 +3926,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '63.0.0', NULL) + (TRUE, NOW(), NOW(), '64.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 99c7725fe3..c5cd88619f 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -25,9 +25,7 @@ use dropshot::{ HttpResponseUpdatedNoContent, Path, Query, RequestContext, StreamingBody, TypedBody, }; -use illumos_utils::opte::params::{ - DeleteVirtualNetworkInterfaceHost, SetVirtualNetworkInterfaceHost, -}; +use illumos_utils::opte::params::VirtualNetworkInterfaceHost; use installinator_common::M2Slot; use omicron_common::api::external::Error; use omicron_common::api::internal::nexus::{ @@ -71,6 +69,7 @@ pub fn api() -> SledApiDescription { api.register(zone_bundle_cleanup_context_update)?; api.register(zone_bundle_cleanup)?; api.register(sled_role_get)?; + api.register(list_v2p)?; api.register(set_v2p)?; api.register(del_v2p)?; api.register(timesync_get)?; @@ -652,24 +651,16 @@ async fn vpc_firewall_rules_put( Ok(HttpResponseUpdatedNoContent()) } -/// Path parameters for V2P mapping related requests (sled agent API) -#[allow(dead_code)] -#[derive(Deserialize, JsonSchema)] -struct V2pPathParam { - interface_id: Uuid, -} - /// Create a mapping from a virtual NIC to a physical host // Keep interface_id to maintain parity with the simulated sled agent, which // requires interface_id on the path. #[endpoint { method = PUT, - path = "/v2p/{interface_id}", + path = "/v2p/", }] async fn set_v2p( rqctx: RequestContext, - _path_params: Path, - body: TypedBody, + body: TypedBody, ) -> Result { let sa = rqctx.context(); let body_args = body.into_inner(); @@ -684,12 +675,11 @@ async fn set_v2p( // requires interface_id on the path. #[endpoint { method = DELETE, - path = "/v2p/{interface_id}", + path = "/v2p/", }] async fn del_v2p( rqctx: RequestContext, - _path_params: Path, - body: TypedBody, + body: TypedBody, ) -> Result { let sa = rqctx.context(); let body_args = body.into_inner(); @@ -699,6 +689,22 @@ async fn del_v2p( Ok(HttpResponseUpdatedNoContent()) } +/// List v2p mappings present on sled +// Used by nexus background task +#[endpoint { + method = GET, + path = "/v2p/", +}] +async fn list_v2p( + rqctx: RequestContext, +) -> Result>, HttpError> { + let sa = rqctx.context(); + + let vnics = sa.list_virtual_nics().await.map_err(Error::from)?; + + Ok(HttpResponseOk(vnics)) +} + #[endpoint { method = GET, path = "/timesync", diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index 6cddac6fb8..ae1318a8b1 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -20,8 +20,7 @@ use dropshot::HttpResponseUpdatedNoContent; use dropshot::Path; use dropshot::RequestContext; use dropshot::TypedBody; -use illumos_utils::opte::params::DeleteVirtualNetworkInterfaceHost; -use illumos_utils::opte::params::SetVirtualNetworkInterfaceHost; +use illumos_utils::opte::params::VirtualNetworkInterfaceHost; use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::SledInstanceState; use omicron_common::api::internal::nexus::UpdateArtifactId; @@ -54,6 +53,7 @@ pub fn api() -> SledApiDescription { api.register(vpc_firewall_rules_put)?; api.register(set_v2p)?; api.register(del_v2p)?; + api.register(list_v2p)?; api.register(uplink_ensure)?; api.register(read_network_bootstore_config)?; api.register(write_network_bootstore_config)?; @@ -343,27 +343,19 @@ async fn vpc_firewall_rules_put( Ok(HttpResponseUpdatedNoContent()) } -/// Path parameters for V2P mapping related requests (sled agent API) -#[derive(Deserialize, JsonSchema)] -struct V2pPathParam { - interface_id: Uuid, -} - /// Create a mapping from a virtual NIC to a physical host #[endpoint { method = PUT, - path = "/v2p/{interface_id}", + path = "/v2p/", }] async fn set_v2p( rqctx: RequestContext>, - path_params: Path, - body: TypedBody, + body: TypedBody, ) -> Result { let sa = rqctx.context(); - let interface_id = path_params.into_inner().interface_id; let body_args = body.into_inner(); - sa.set_virtual_nic_host(interface_id, &body_args) + sa.set_virtual_nic_host(&body_args) .await .map_err(|e| HttpError::for_internal_error(e.to_string()))?; @@ -373,24 +365,37 @@ async fn set_v2p( /// Delete a mapping from a virtual NIC to a physical host #[endpoint { method = DELETE, - path = "/v2p/{interface_id}", + path = "/v2p/", }] async fn del_v2p( rqctx: RequestContext>, - path_params: Path, - body: TypedBody, + body: TypedBody, ) -> Result { let sa = rqctx.context(); - let interface_id = path_params.into_inner().interface_id; let body_args = body.into_inner(); - sa.unset_virtual_nic_host(interface_id, &body_args) + sa.unset_virtual_nic_host(&body_args) .await .map_err(|e| HttpError::for_internal_error(e.to_string()))?; Ok(HttpResponseUpdatedNoContent()) } +/// List v2p mappings present on sled +#[endpoint { + method = GET, + path = "/v2p/", +}] +async fn list_v2p( + rqctx: RequestContext>, +) -> Result>, HttpError> { + let sa = rqctx.context(); + + let vnics = sa.list_virtual_nics().await.map_err(HttpError::from)?; + + Ok(HttpResponseOk(vnics)) +} + #[endpoint { method = POST, path = "/switch-ports", diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 298a8adc34..d9308bf769 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -26,9 +26,7 @@ use anyhow::bail; use anyhow::Context; use dropshot::{HttpError, HttpServer}; use futures::lock::Mutex; -use illumos_utils::opte::params::{ - DeleteVirtualNetworkInterfaceHost, SetVirtualNetworkInterfaceHost, -}; +use illumos_utils::opte::params::VirtualNetworkInterfaceHost; use ipnetwork::Ipv6Network; use omicron_common::api::external::{ ByteCount, DiskState, Error, Generation, ResourceType, @@ -74,7 +72,7 @@ pub struct SledAgent { nexus_address: SocketAddr, pub nexus_client: Arc, disk_id_to_region_ids: Mutex>>, - pub v2p_mappings: Mutex>>, + pub v2p_mappings: Mutex>, mock_propolis: Mutex>, PropolisClient)>>, /// lists of external IPs assigned to instances @@ -189,7 +187,7 @@ impl SledAgent { nexus_address, nexus_client, disk_id_to_region_ids: Mutex::new(HashMap::new()), - v2p_mappings: Mutex::new(HashMap::new()), + v2p_mappings: Mutex::new(HashSet::new()), external_ips: Mutex::new(HashMap::new()), mock_propolis: Mutex::new(None), config: config.clone(), @@ -672,36 +670,29 @@ impl SledAgent { pub async fn set_virtual_nic_host( &self, - interface_id: Uuid, - mapping: &SetVirtualNetworkInterfaceHost, + mapping: &VirtualNetworkInterfaceHost, ) -> Result<(), Error> { let mut v2p_mappings = self.v2p_mappings.lock().await; - let vec = v2p_mappings.entry(interface_id).or_default(); - vec.push(mapping.clone()); + v2p_mappings.insert(mapping.clone()); Ok(()) } pub async fn unset_virtual_nic_host( &self, - interface_id: Uuid, - mapping: &DeleteVirtualNetworkInterfaceHost, + mapping: &VirtualNetworkInterfaceHost, ) -> Result<(), Error> { let mut v2p_mappings = self.v2p_mappings.lock().await; - let vec = v2p_mappings.entry(interface_id).or_default(); - vec.retain(|x| { - x.virtual_ip != mapping.virtual_ip || x.vni != mapping.vni - }); - - // If the last entry was removed, remove the entire interface ID so that - // tests don't have to distinguish never-created entries from - // previously-extant-but-now-empty entries. - if vec.is_empty() { - v2p_mappings.remove(&interface_id); - } - + v2p_mappings.remove(mapping); Ok(()) } + pub async fn list_virtual_nics( + &self, + ) -> Result, Error> { + let v2p_mappings = self.v2p_mappings.lock().await; + Ok(Vec::from_iter(v2p_mappings.clone())) + } + pub async fn instance_put_external_ip( &self, instance_id: Uuid, diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 39a5647420..670d486686 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -37,9 +37,7 @@ use derive_more::From; use dropshot::HttpError; use futures::stream::FuturesUnordered; use futures::StreamExt; -use illumos_utils::opte::params::{ - DeleteVirtualNetworkInterfaceHost, SetVirtualNetworkInterfaceHost, -}; +use illumos_utils::opte::params::VirtualNetworkInterfaceHost; use illumos_utils::opte::PortManager; use illumos_utils::zone::PROPOLIS_ZONE_PREFIX; use illumos_utils::zone::ZONE_PREFIX; @@ -1051,9 +1049,15 @@ impl SledAgent { .map_err(Error::from) } + pub async fn list_virtual_nics( + &self, + ) -> Result, Error> { + self.inner.port_manager.list_virtual_nics().map_err(Error::from) + } + pub async fn set_virtual_nic_host( &self, - mapping: &SetVirtualNetworkInterfaceHost, + mapping: &VirtualNetworkInterfaceHost, ) -> Result<(), Error> { self.inner .port_manager @@ -1063,7 +1067,7 @@ impl SledAgent { pub async fn unset_virtual_nic_host( &self, - mapping: &DeleteVirtualNetworkInterfaceHost, + mapping: &VirtualNetworkInterfaceHost, ) -> Result<(), Error> { self.inner .port_manager diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml index 696411966b..0ed7a0562b 100644 --- a/smf/nexus/multi-sled/config-partial.toml +++ b/smf/nexus/multi-sled/config-partial.toml @@ -56,6 +56,7 @@ sync_service_zone_nat.period_secs = 30 switch_port_settings_manager.period_secs = 30 region_replacement.period_secs = 30 service_firewall_propagation.period_secs = 300 +v2p_mapping_propagation.period_secs = 30 instance_watcher.period_secs = 30 [default_region_allocation_strategy] diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml index 206f716fa7..c57d2d3ba2 100644 --- a/smf/nexus/single-sled/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -56,6 +56,7 @@ sync_service_zone_nat.period_secs = 30 switch_port_settings_manager.period_secs = 30 region_replacement.period_secs = 30 service_firewall_propagation.period_secs = 300 +v2p_mapping_propagation.period_secs = 30 instance_watcher.period_secs = 30 [default_region_allocation_strategy] diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version index 966e4de7fe..73095bd42d 100644 --- a/tools/maghemite_mg_openapi_version +++ b/tools/maghemite_mg_openapi_version @@ -1,2 +1,2 @@ -COMMIT="025389ff39d594bf2b815377e2c1dc4dd23b1f96" -SHA2="a5d2f275c99152711dec1df58fd49d459d3fcb8fbfc7a7f48f432be248d74639" +COMMIT="23b0cf439f9f62b9a4933e55cc72bcaddc9596cd" +SHA2="fdb33ee7425923560534672264008ef8948d227afce948ab704de092ad72157c" diff --git a/tools/opte_version b/tools/opte_version index e1b3e11499..41d9666b04 100644 --- a/tools/opte_version +++ b/tools/opte_version @@ -1 +1 @@ -0.28.233 +0.29.248