From 5d5aab50d593dcab01028d026bd135e33486fda5 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Thu, 16 May 2024 16:31:34 -0400 Subject: [PATCH] Blueprint builder: Guard against unknown external networking inputs (#5776) This started as a small cleanup to make `builder.rs` a little less noisy (the first commit, that just moves all the external networking "what IPs are in use and what IP should I pick next" logic into a separate module). As I was doing that, though, I noticed we only look at the parent blueprint when deciding what IPs are used, and we completely ignore `PlanningInput` (which tells us what IPs were in use in the database "recently"). I _think_ we should never have an IP or NIC in the database that isn't also in the parent blueprint: blueprints are the source of how IPs/NICs are created, and we shouldn't prune zones from the blueprint until we've confirmed that external networking records for those zones have been deleted. I made this a planning error: if we get an input that reports external IPs or NICs that the parent blueprint doesn't know about, we bail out and fail to plan. Assuming all of the above is correct (please correct me if any of it seems wrong!), it is still possible for this to fail, I think, if we had a sequence like this: 1. Nexus A generates a `PlanningInput` by reading from CRDB 2. Nexus B executes on a target blueprint that removes IPs/NICs from CRDB 3. Nexus B regenerates a new blueprint and prunes the zone(s) associated with the IPs/NICs from step 2 4. Nexus B makes this new blueprint the target 5. Nexus A attempts to generate a new blueprint with its `PlanningInput` from step 1 This isn't possible today for at least a couple reasons: we don't do prune zones as in step 3 at all, and the timing for steps 3-5 probably require automated blueprint regeneration. I think it could eventually be possible, but seems fine? Nexus A would fail to plan in step 5, but it would succeed the next time it tried (because it would generate a new `PlanningInput` that reflected the changes from step 2). --- .../planning/src/blueprint_builder/builder.rs | 300 ++++------------ .../blueprint_builder/external_networking.rs | 331 ++++++++++++++++++ .../planning/src/blueprint_builder/mod.rs | 1 + nexus/reconfigurator/planning/src/planner.rs | 39 +++ .../types/src/deployment/network_resources.rs | 24 +- nexus/types/src/deployment/planning_input.rs | 6 + nexus/types/src/deployment/tri_map.rs | 4 + 7 files changed, 475 insertions(+), 230 deletions(-) create mode 100644 nexus/reconfigurator/planning/src/blueprint_builder/external_networking.rs diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index 8c8f4f3e29..45aea75473 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -7,12 +7,9 @@ use crate::ip_allocator::IpAllocator; use crate::planner::ZoneExpungeReason; use anyhow::anyhow; -use anyhow::bail; -use debug_ignore::DebugIgnore; use internal_dns::config::Host; use internal_dns::config::Zone; use ipnet::IpAdd; -use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; use nexus_inventory::now_db_precision; use nexus_types::deployment::blueprint_zone_type; use nexus_types::deployment::Blueprint; @@ -35,13 +32,9 @@ use omicron_common::address::get_internal_dns_server_addresses; use omicron_common::address::get_sled_address; use omicron_common::address::get_switch_zone_address; use omicron_common::address::CP_SERVICES_RESERVED_ADDRESSES; -use omicron_common::address::NEXUS_OPTE_IPV4_SUBNET; -use omicron_common::address::NEXUS_OPTE_IPV6_SUBNET; use omicron_common::address::NTP_PORT; use omicron_common::address::SLED_RESERVED_ADDRESSES; use omicron_common::api::external::Generation; -use omicron_common::api::external::IpNet; -use omicron_common::api::external::MacAddr; use omicron_common::api::external::Vni; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::NetworkInterfaceKind; @@ -64,13 +57,14 @@ use std::collections::BTreeSet; use std::collections::HashSet; use std::hash::Hash; use std::net::IpAddr; -use std::net::Ipv4Addr; use std::net::Ipv6Addr; use std::net::SocketAddrV6; use thiserror::Error; use typed_rng::TypedUuidRng; use typed_rng::UuidRng; +use super::external_networking::BuilderExternalNetworking; +use super::external_networking::ExternalNetworkingChoice; use super::zones::is_already_expunged; use super::zones::BuilderZoneState; use super::zones::BuilderZonesConfig; @@ -142,9 +136,10 @@ pub struct BlueprintBuilder<'a> { /// previous blueprint, on which this one will be based parent_blueprint: &'a Blueprint, - // These fields are used to allocate resources from sleds. + // These fields are used to allocate resources for sleds. input: &'a PlanningInput, sled_ip_allocators: BTreeMap, + external_networking: BuilderExternalNetworking<'a>, // These fields will become part of the final blueprint. See the // corresponding fields in `Blueprint`. @@ -155,16 +150,6 @@ pub struct BlueprintBuilder<'a> { creator: String, comments: Vec, - // These fields mirror how RSS chooses addresses for zone NICs. - nexus_v4_ips: AvailableIterator<'static, Ipv4Addr>, - nexus_v6_ips: AvailableIterator<'static, Ipv6Addr>, - - // Iterator of available external IPs for service zones - available_external_ips: AvailableIterator<'a, IpAddr>, - - // Iterator of available MAC addresses in the system address range - available_system_macs: AvailableIterator<'a, MacAddr>, - // Random number generator for new UUIDs rng: BlueprintBuilderRng, } @@ -242,111 +227,8 @@ impl<'a> BlueprintBuilder<'a> { "parent_id" => parent_blueprint.id.to_string(), )); - // Scan through the parent blueprint and build several sets of "used - // resources". When adding new control plane zones to a sled, we may - // need to allocate new resources to that zone. However, allocation at - // this point is entirely optimistic and theoretical: our caller may - // discard the blueprint we create without ever making it the new - // target, or it might be an arbitrarily long time before it becomes - // the target. We need to be able to make allocation decisions that we - // expect the blueprint executor to be able to realize successfully if - // and when we become the target, but we cannot _actually_ perform - // resource allocation. - // - // To do this, we look at our parent blueprint's used resources, and - // then choose new resources that aren't already in use (if possible; - // if we need to allocate a new resource and the parent blueprint - // appears to be using all the resources of that kind, our blueprint - // generation will fail). - // - // For example, RSS assigns Nexus NIC IPs by stepping through a list of - // addresses based on `NEXUS_OPTE_IPVx_SUBNET` (as in the iterators - // below). We use the same list of addresses, but additionally need to - // filter out the existing IPs for any Nexus instances that already - // exist. - // - // Note that by building these iterators up front based on - // `parent_blueprint`, we cannot reuse resources in a case where we - // remove a zone that used a resource and then add another zone that - // wants the same kind of resource. That is mostly okay, but there are - // some cases in which we may have to do that -- particularly external - // DNS zones, which tend to have a small number of fixed IPs. Solving - // that is a TODO. - // - // Also note that currently, we don't perform any kind of garbage - // collection on sleds and zones that no longer have any attached - // resources. Once a sled or zone is marked expunged, it will always - // stay in that state. - // https://github.com/oxidecomputer/omicron/issues/5552 tracks - // implementing this kind of garbage collection, and we should do it - // very soon. - - let mut existing_nexus_v4_ips: HashSet = HashSet::new(); - let mut existing_nexus_v6_ips: HashSet = HashSet::new(); - let mut used_external_ips: HashSet = HashSet::new(); - let mut used_macs: HashSet = HashSet::new(); - - for (_, z) in - parent_blueprint.all_omicron_zones(BlueprintZoneFilter::All) - { - let zone_type = &z.zone_type; - if let BlueprintZoneType::Nexus(nexus) = zone_type { - match nexus.nic.ip { - IpAddr::V4(ip) => { - if !existing_nexus_v4_ips.insert(ip) { - bail!("duplicate Nexus NIC IP: {ip}"); - } - } - IpAddr::V6(ip) => { - if !existing_nexus_v6_ips.insert(ip) { - bail!("duplicate Nexus NIC IP: {ip}"); - } - } - } - } - - if let Some((external_ip, nic)) = zone_type.external_networking() { - // For the test suite, ignore localhost. It gets reused many - // times and that's okay. We don't expect to see localhost - // outside the test suite. - if !external_ip.ip().is_loopback() - && !used_external_ips.insert(external_ip.ip()) - { - bail!("duplicate external IP: {external_ip:?}"); - } - - if !used_macs.insert(nic.mac) { - bail!("duplicate service vNIC MAC: {}", nic.mac); - } - } - } - - // TODO-performance Building these iterators as "walk through the list - // and skip anything we've used already" is fine as long as we're - // talking about a small number of resources (e.g., single-digit number - // of Nexus instances), but wouldn't be ideal if we have many resources - // we need to skip. We could do something smarter here based on the sets - // of used resources we built above if needed. - let nexus_v4_ips = AvailableIterator::new( - NEXUS_OPTE_IPV4_SUBNET - .0 - .iter() - .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES), - existing_nexus_v4_ips, - ); - let nexus_v6_ips = AvailableIterator::new( - NEXUS_OPTE_IPV6_SUBNET - .0 - .iter() - .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES), - existing_nexus_v6_ips, - ); - let available_external_ips = AvailableIterator::new( - input.service_ip_pool_ranges().iter().flat_map(|r| r.iter()), - used_external_ips, - ); - let available_system_macs = - AvailableIterator::new(MacAddr::iter_system(), used_macs); + let external_networking = + BuilderExternalNetworking::new(parent_blueprint, input)?; // Prefer the sled state from our parent blueprint for sleds // that were in it; there may be new sleds in `input`, in which @@ -378,15 +260,12 @@ impl<'a> BlueprintBuilder<'a> { parent_blueprint, input, sled_ip_allocators: BTreeMap::new(), + external_networking, zones: BlueprintZonesBuilder::new(parent_blueprint), disks: BlueprintDisksBuilder::new(parent_blueprint), sled_state, creator: creator.to_owned(), comments: Vec::new(), - nexus_v4_ips, - nexus_v6_ips, - available_external_ips, - available_system_macs, rng: BlueprintBuilderRng::new(), }) } @@ -806,44 +685,27 @@ impl<'a> BlueprintBuilder<'a> { for _ in 0..num_nexus_to_add { let nexus_id = self.rng.zone_rng.next(); + let ExternalNetworkingChoice { + external_ip, + nic_ip, + nic_subnet, + nic_mac, + } = self.external_networking.for_new_nexus()?; let external_ip = OmicronZoneExternalFloatingIp { id: self.rng.external_ip_rng.next(), - ip: self - .available_external_ips - .next() - .ok_or(Error::NoExternalServiceIpAvailable)?, + ip: external_ip, }; let nic = { - let (ip, subnet) = match external_ip.ip { - IpAddr::V4(_) => ( - self.nexus_v4_ips - .next() - .ok_or(Error::ExhaustedNexusIps)? - .into(), - IpNet::from(*NEXUS_OPTE_IPV4_SUBNET), - ), - IpAddr::V6(_) => ( - self.nexus_v6_ips - .next() - .ok_or(Error::ExhaustedNexusIps)? - .into(), - IpNet::from(*NEXUS_OPTE_IPV6_SUBNET), - ), - }; - let mac = self - .available_system_macs - .next() - .ok_or(Error::NoSystemMacAddressAvailable)?; NetworkInterface { id: self.rng.network_interface_rng.next(), kind: NetworkInterfaceKind::Service { id: nexus_id.into_untyped_uuid(), }, name: format!("nexus-{nexus_id}").parse().unwrap(), - ip, - mac, - subnet, + ip: nic_ip, + mac: nic_mac, + subnet: nic_subnet, vni: Vni::SERVICES_VNI, primary: true, slot: 0, @@ -942,45 +804,6 @@ impl<'a> BlueprintBuilder<'a> { } } -/// Combines a base iterator with an `in_use` set, filtering out any elements -/// that are in the "in_use" set. -/// -/// This can be done with a chained `.filter` on the iterator, but -/// `AvailableIterator` also allows for inspection of the `in_use` set. -/// -/// Note that this is a stateful iterator -- i.e. it implements `Iterator`, not -/// `IntoIterator`. That's what we currently need in the planner. -#[derive(Debug)] -pub struct AvailableIterator<'a, T> { - base: DebugIgnore + Send + 'a>>, - in_use: HashSet, -} - -impl<'a, T: Hash + Eq> AvailableIterator<'a, T> { - /// Creates a new `AvailableIterator` from a base iterator and a set of - /// elements that are in use. - pub fn new(base: I, in_use: impl IntoIterator) -> Self - where - I: Iterator + Send + 'a, - { - let in_use = in_use.into_iter().collect(); - AvailableIterator { base: DebugIgnore(Box::new(base)), in_use } - } - - /// Returns the in-use set. - pub fn in_use(&self) -> &HashSet { - &self.in_use - } -} - -impl Iterator for AvailableIterator<'_, T> { - type Item = T; - - fn next(&mut self) -> Option { - self.base.find(|item| !self.in_use.contains(item)) - } -} - #[derive(Debug)] struct BlueprintBuilderRng { // Have separate RNGs for the different kinds of UUIDs we might add, @@ -1230,11 +1053,12 @@ pub mod test { use expectorate::assert_contents; use nexus_types::deployment::BlueprintOrCollectionZoneConfig; use nexus_types::deployment::BlueprintZoneFilter; + use nexus_types::deployment::OmicronZoneNetworkResources; use nexus_types::external_api::views::SledPolicy; use omicron_common::address::IpRange; use omicron_test_utils::dev::test_setup_log; use std::collections::BTreeSet; - use test_strategy::proptest; + use std::mem; pub const DEFAULT_N_SLEDS: usize = 3; @@ -1535,6 +1359,14 @@ pub mod test { static TEST_NAME: &str = "blueprint_builder_test_add_physical_disks"; let logctx = test_setup_log(TEST_NAME); let (_, input, _) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); + let input = { + // Clear out the external networking records from `input`, since + // we're building an empty blueprint. + let mut builder = input.into_builder(); + *builder.network_resources_mut() = + OmicronZoneNetworkResources::new(); + builder.build() + }; // Start with an empty blueprint (sleds with no zones). let parent = BlueprintBuilder::build_empty_with_sleds_seeded( @@ -1583,6 +1415,14 @@ pub mod test { // Discard the example blueprint and start with an empty one. let (collection, input, _) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); + let input = { + // Clear out the external networking records from `input`, since + // we're building an empty blueprint. + let mut builder = input.into_builder(); + *builder.network_resources_mut() = + OmicronZoneNetworkResources::new(); + builder.build() + }; let parent = BlueprintBuilder::build_empty_with_sleds_seeded( input.all_sled_ids(SledFilter::Commissioned), "test", @@ -1624,7 +1464,7 @@ pub mod test { fn test_add_nexus_error_cases() { static TEST_NAME: &str = "blueprint_builder_test_add_nexus_error_cases"; let logctx = test_setup_log(TEST_NAME); - let (mut collection, input, mut parent) = + let (mut collection, mut input, mut parent) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); // Remove the Nexus zone from one of the sleds so that @@ -1638,12 +1478,51 @@ pub mod test { if zones.zones.zones.len() < nzones_before_retain { selected_sled_id = Some(*sled_id); // Also remove this zone from the blueprint. + let mut removed_nexus = None; parent .blueprint_zones .get_mut(sled_id) .expect("missing sled") .zones - .retain(|z| !z.zone_type.is_nexus()); + .retain(|z| match &z.zone_type { + BlueprintZoneType::Nexus(z) => { + removed_nexus = Some(z.clone()); + false + } + _ => true, + }); + let removed_nexus = + removed_nexus.expect("removed Nexus from blueprint"); + + // Also remove this Nexus's external networking resources + // from `input`. + let mut builder = input.into_builder(); + let mut new_network_resources = + OmicronZoneNetworkResources::new(); + let old_network_resources = builder.network_resources_mut(); + for ip in old_network_resources.omicron_zone_external_ips() + { + if ip.ip.id() != removed_nexus.external_ip.id { + new_network_resources + .add_external_ip(ip.zone_id, ip.ip) + .expect("copied IP to new input"); + } + } + for nic in old_network_resources.omicron_zone_nics() { + if nic.nic.id.into_untyped_uuid() + != removed_nexus.nic.id + { + new_network_resources + .add_nic(nic.zone_id, nic.nic) + .expect("copied NIC to new input"); + } + } + mem::swap( + old_network_resources, + &mut new_network_resources, + ); + input = builder.build(); + break; } } @@ -1887,31 +1766,4 @@ pub mod test { logctx.cleanup_successful(); } - - /// Test that `AvailableIterator` correctly filters out items that are in - /// use. - #[proptest] - fn test_available_iterator(items: HashSet<(i32, bool)>) { - let mut in_use_map = HashSet::new(); - let mut expected_available = Vec::new(); - let items: Vec<_> = items - .into_iter() - .map(|(item, in_use)| { - if in_use { - in_use_map.insert(item); - } else { - expected_available.push(item); - } - item - }) - .collect(); - - let available = AvailableIterator::new(items.into_iter(), in_use_map); - let actual_available = available.collect::>(); - - assert_eq!( - expected_available, actual_available, - "available items match" - ); - } } diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/external_networking.rs b/nexus/reconfigurator/planning/src/blueprint_builder/external_networking.rs new file mode 100644 index 0000000000..b9100f518d --- /dev/null +++ b/nexus/reconfigurator/planning/src/blueprint_builder/external_networking.rs @@ -0,0 +1,331 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::Error; +use anyhow::bail; +use debug_ignore::DebugIgnore; +use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; +use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintZoneFilter; +use nexus_types::deployment::BlueprintZoneType; +use nexus_types::deployment::PlanningInput; +use omicron_common::address::DNS_OPTE_IPV4_SUBNET; +use omicron_common::address::DNS_OPTE_IPV6_SUBNET; +use omicron_common::address::NEXUS_OPTE_IPV4_SUBNET; +use omicron_common::address::NEXUS_OPTE_IPV6_SUBNET; +use omicron_common::address::NTP_OPTE_IPV4_SUBNET; +use omicron_common::address::NTP_OPTE_IPV6_SUBNET; +use omicron_common::api::external::IpNet; +use omicron_common::api::external::MacAddr; +use std::collections::HashSet; +use std::hash::Hash; +use std::net::IpAddr; +use std::net::Ipv4Addr; +use std::net::Ipv6Addr; + +#[derive(Debug)] +pub(super) struct BuilderExternalNetworking<'a> { + // These fields mirror how RSS chooses addresses for zone NICs. + nexus_v4_ips: AvailableIterator<'static, Ipv4Addr>, + nexus_v6_ips: AvailableIterator<'static, Ipv6Addr>, + + // Iterator of available external IPs for service zones + available_external_ips: AvailableIterator<'a, IpAddr>, + + // Iterator of available MAC addresses in the system address range + available_system_macs: AvailableIterator<'a, MacAddr>, +} + +impl<'a> BuilderExternalNetworking<'a> { + pub(super) fn new( + parent_blueprint: &'a Blueprint, + input: &'a PlanningInput, + ) -> anyhow::Result { + // Scan through the parent blueprint and build several sets of "used + // resources". When adding new control plane zones to a sled, we may + // need to allocate new resources to that zone. However, allocation at + // this point is entirely optimistic and theoretical: our caller may + // discard the blueprint we create without ever making it the new + // target, or it might be an arbitrarily long time before it becomes + // the target. We need to be able to make allocation decisions that we + // expect the blueprint executor to be able to realize successfully if + // and when we become the target, but we cannot _actually_ perform + // resource allocation. + // + // To do this, we look at our parent blueprint's used resources, and + // then choose new resources that aren't already in use (if possible; + // if we need to allocate a new resource and the parent blueprint + // appears to be using all the resources of that kind, our blueprint + // generation will fail). + // + // For example, RSS assigns Nexus NIC IPs by stepping through a list of + // addresses based on `NEXUS_OPTE_IPVx_SUBNET` (as in the iterators + // below). We use the same list of addresses, but additionally need to + // filter out the existing IPs for any Nexus instances that already + // exist. + // + // Note that by building these iterators up front based on + // `parent_blueprint`, we cannot reuse resources in a case where we + // remove a zone that used a resource and then add another zone that + // wants the same kind of resource. That is mostly okay, but there are + // some cases in which we may have to do that -- particularly external + // DNS zones, which tend to have a small number of fixed IPs. Solving + // that is a TODO. + // + // Also note that currently, we don't perform any kind of garbage + // collection on sleds and zones that no longer have any attached + // resources. Once a sled or zone is marked expunged, it will always + // stay in that state. + // https://github.com/oxidecomputer/omicron/issues/5552 tracks + // implementing this kind of garbage collection, and we should do it + // very soon. + + let mut existing_nexus_v4_ips: HashSet = HashSet::new(); + let mut existing_nexus_v6_ips: HashSet = HashSet::new(); + let mut used_external_ips: HashSet = HashSet::new(); + let mut used_macs: HashSet = HashSet::new(); + + for (_, z) in + parent_blueprint.all_omicron_zones(BlueprintZoneFilter::All) + { + let zone_type = &z.zone_type; + if let BlueprintZoneType::Nexus(nexus) = zone_type { + match nexus.nic.ip { + IpAddr::V4(ip) => { + if !existing_nexus_v4_ips.insert(ip) { + bail!("duplicate Nexus NIC IP: {ip}"); + } + } + IpAddr::V6(ip) => { + if !existing_nexus_v6_ips.insert(ip) { + bail!("duplicate Nexus NIC IP: {ip}"); + } + } + } + } + + if let Some((external_ip, nic)) = zone_type.external_networking() { + // For the test suite, ignore localhost. It gets reused many + // times and that's okay. We don't expect to see localhost + // outside the test suite. + if !external_ip.ip().is_loopback() + && !used_external_ips.insert(external_ip.ip()) + { + bail!("duplicate external IP: {external_ip:?}"); + } + + if !used_macs.insert(nic.mac) { + bail!("duplicate service vNIC MAC: {}", nic.mac); + } + } + } + + // Check the planning input: there shouldn't be any external networking + // resources in the database (the source of `input`) that we don't know + // about from the parent blueprint. + for external_ip_entry in + input.network_resources().omicron_zone_external_ips() + { + // As above, ignore localhost (used by the test suite). + if external_ip_entry.ip.ip().is_loopback() { + continue; + } + if !used_external_ips.contains(&external_ip_entry.ip.ip()) { + bail!( + "planning input contains unexpected external IP \ + (IP not found in parent blueprint): {external_ip_entry:?}" + ); + } + } + for nic_entry in input.network_resources().omicron_zone_nics() { + if !used_macs.contains(&nic_entry.nic.mac) { + bail!( + "planning input contains unexpected NIC \ + (MAC not found in parent blueprint): {nic_entry:?}" + ); + } + match nic_entry.nic.ip { + IpAddr::V4(ip) if NEXUS_OPTE_IPV4_SUBNET.contains(ip) => { + if !existing_nexus_v4_ips.contains(&ip) { + bail!( + "planning input contains unexpected NIC \ + (IP not found in parent blueprint): {nic_entry:?}" + ); + } + } + IpAddr::V4(ip) if NTP_OPTE_IPV4_SUBNET.contains(ip) => { + // TODO check existing_ntp_v4_ips, once it exists + } + IpAddr::V4(ip) if DNS_OPTE_IPV4_SUBNET.contains(ip) => { + // TODO check existing_dns_v4_ips, once it exists + } + IpAddr::V6(ip) if NEXUS_OPTE_IPV6_SUBNET.contains(ip) => { + if !existing_nexus_v6_ips.contains(&ip) { + bail!( + "planning input contains unexpected NIC \ + (IP not found in parent blueprint): {nic_entry:?}" + ); + } + } + IpAddr::V6(ip) if NTP_OPTE_IPV6_SUBNET.contains(ip) => { + // TODO check existing_ntp_v6_ips, once it exists + } + IpAddr::V6(ip) if DNS_OPTE_IPV6_SUBNET.contains(ip) => { + // TODO check existing_dns_v6_ips, once it exists + } + _ => { + bail!( + "planning input contains unexpected NIC \ + (IP not contained in known OPTE subnet): {nic_entry:?}" + ) + } + } + } + + // TODO-performance Building these iterators as "walk through the list + // and skip anything we've used already" is fine as long as we're + // talking about a small number of resources (e.g., single-digit number + // of Nexus instances), but wouldn't be ideal if we have many resources + // we need to skip. We could do something smarter here based on the sets + // of used resources we built above if needed. + let nexus_v4_ips = AvailableIterator::new( + NEXUS_OPTE_IPV4_SUBNET + .0 + .iter() + .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES), + existing_nexus_v4_ips, + ); + let nexus_v6_ips = AvailableIterator::new( + NEXUS_OPTE_IPV6_SUBNET + .0 + .iter() + .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES), + existing_nexus_v6_ips, + ); + let available_external_ips = AvailableIterator::new( + input.service_ip_pool_ranges().iter().flat_map(|r| r.iter()), + used_external_ips, + ); + let available_system_macs = + AvailableIterator::new(MacAddr::iter_system(), used_macs); + + Ok(Self { + nexus_v4_ips, + nexus_v6_ips, + available_external_ips, + available_system_macs, + }) + } + + pub(super) fn for_new_nexus( + &mut self, + ) -> Result { + let external_ip = self + .available_external_ips + .next() + .ok_or(Error::NoExternalServiceIpAvailable)?; + let (nic_ip, nic_subnet) = match external_ip { + IpAddr::V4(_) => ( + self.nexus_v4_ips + .next() + .ok_or(Error::ExhaustedNexusIps)? + .into(), + IpNet::from(*NEXUS_OPTE_IPV4_SUBNET), + ), + IpAddr::V6(_) => ( + self.nexus_v6_ips + .next() + .ok_or(Error::ExhaustedNexusIps)? + .into(), + IpNet::from(*NEXUS_OPTE_IPV6_SUBNET), + ), + }; + let nic_mac = self + .available_system_macs + .next() + .ok_or(Error::NoSystemMacAddressAvailable)?; + + Ok(ExternalNetworkingChoice { + external_ip, + nic_ip, + nic_subnet, + nic_mac, + }) + } +} + +#[derive(Debug, Clone, Copy)] +pub(super) struct ExternalNetworkingChoice { + pub(super) external_ip: IpAddr, + pub(super) nic_ip: IpAddr, + pub(super) nic_subnet: IpNet, + pub(super) nic_mac: MacAddr, +} + +/// Combines a base iterator with an `in_use` set, filtering out any elements +/// that are in the "in_use" set. +/// +/// This can be done with a chained `.filter` on the iterator, but +/// `AvailableIterator` also allows for inspection of the `in_use` set. +/// +/// Note that this is a stateful iterator -- i.e. it implements `Iterator`, not +/// `IntoIterator`. That's what we currently need in the planner. +#[derive(Debug)] +struct AvailableIterator<'a, T> { + base: DebugIgnore + Send + 'a>>, + in_use: HashSet, +} + +impl<'a, T: Hash + Eq> AvailableIterator<'a, T> { + /// Creates a new `AvailableIterator` from a base iterator and a set of + /// elements that are in use. + fn new(base: I, in_use: impl IntoIterator) -> Self + where + I: Iterator + Send + 'a, + { + let in_use = in_use.into_iter().collect(); + AvailableIterator { base: DebugIgnore(Box::new(base)), in_use } + } +} + +impl Iterator for AvailableIterator<'_, T> { + type Item = T; + + fn next(&mut self) -> Option { + self.base.find(|item| !self.in_use.contains(item)) + } +} + +#[cfg(test)] +pub mod test { + use super::*; + use test_strategy::proptest; + + /// Test that `AvailableIterator` correctly filters out items that are in + /// use. + #[proptest] + fn test_available_iterator(items: HashSet<(i32, bool)>) { + let mut in_use_map = HashSet::new(); + let mut expected_available = Vec::new(); + let items: Vec<_> = items + .into_iter() + .map(|(item, in_use)| { + if in_use { + in_use_map.insert(item); + } else { + expected_available.push(item); + } + item + }) + .collect(); + + let available = AvailableIterator::new(items.into_iter(), in_use_map); + let actual_available = available.collect::>(); + + assert_eq!( + expected_available, actual_available, + "available items match" + ); + } +} diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/mod.rs b/nexus/reconfigurator/planning/src/blueprint_builder/mod.rs index e3afa2cdad..99d3b41772 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/mod.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/mod.rs @@ -5,6 +5,7 @@ //! Low-level facility for generating Blueprints mod builder; +mod external_networking; mod zones; pub use builder::*; diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 0c7ee8f5cb..aca5f057d8 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -504,6 +504,7 @@ mod test { use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::BlueprintZoneType; + use nexus_types::deployment::OmicronZoneNetworkResources; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledProvisionPolicy; use nexus_types::external_api::views::SledState; @@ -511,10 +512,12 @@ mod test { use omicron_common::api::external::Generation; use omicron_common::disk::DiskIdentity; use omicron_test_utils::dev::test_setup_log; + use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; use sled_agent_client::ZoneKind; + use std::mem; use typed_rng::TypedUuidRng; /// Runs through a basic sequence of blueprints for adding a sled @@ -734,6 +737,42 @@ mod test { blueprint.blueprint_zones.retain(|k, _v| keep_sled_id == *k); blueprint.blueprint_disks.retain(|k, _v| keep_sled_id == *k); + // Also remove all the networking resources for the zones we just + // stripped out; i.e., only keep those for `keep_sled_id`. + let mut new_network_resources = OmicronZoneNetworkResources::new(); + let old_network_resources = builder.network_resources_mut(); + for old_ip in old_network_resources.omicron_zone_external_ips() { + if blueprint.all_omicron_zones(BlueprintZoneFilter::All).any( + |(_, zone)| { + zone.zone_type + .external_networking() + .map(|(ip, _nic)| ip.id() == old_ip.ip.id()) + .unwrap_or(false) + }, + ) { + new_network_resources + .add_external_ip(old_ip.zone_id, old_ip.ip) + .expect("copied IP to new input"); + } + } + for old_nic in old_network_resources.omicron_zone_nics() { + if blueprint.all_omicron_zones(BlueprintZoneFilter::All).any( + |(_, zone)| { + zone.zone_type + .external_networking() + .map(|(_ip, nic)| { + nic.id == old_nic.nic.id.into_untyped_uuid() + }) + .unwrap_or(false) + }, + ) { + new_network_resources + .add_nic(old_nic.zone_id, old_nic.nic) + .expect("copied NIC to new input"); + } + } + mem::swap(old_network_resources, &mut &mut new_network_resources); + (keep_sled_id, blueprint, collection, builder.build()) }; diff --git a/nexus/types/src/deployment/network_resources.rs b/nexus/types/src/deployment/network_resources.rs index 15f495d87a..c93e604af9 100644 --- a/nexus/types/src/deployment/network_resources.rs +++ b/nexus/types/src/deployment/network_resources.rs @@ -59,6 +59,18 @@ impl OmicronZoneNetworkResources { } } + pub fn omicron_zone_external_ips( + &self, + ) -> impl Iterator + '_ { + self.omicron_zone_external_ips.iter().copied() + } + + pub fn omicron_zone_nics( + &self, + ) -> impl Iterator + '_ { + self.omicron_zone_nics.iter().copied() + } + pub fn add_external_ip( &mut self, zone_id: OmicronZoneUuid, @@ -79,7 +91,7 @@ impl OmicronZoneNetworkResources { zone_id: OmicronZoneUuid, nic: OmicronZoneNic, ) -> Result<(), AddNetworkResourceError> { - let entry = OmicronZoneNicEntry { zone_id, nic: nic.clone() }; + let entry = OmicronZoneNicEntry { zone_id, nic }; self.omicron_zone_nics.insert_no_dups(entry).map_err(|err| { AddNetworkResourceError::DuplicateOmicronZoneNic { zone_id, @@ -221,7 +233,7 @@ pub struct OmicronZoneExternalSnatIp { /// /// This is a slimmer `nexus_db_model::ServiceNetworkInterface` that only stores /// the fields necessary for blueprint planning. -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] pub struct OmicronZoneNic { pub id: VnicUuid, pub mac: MacAddr, @@ -233,7 +245,7 @@ pub struct OmicronZoneNic { /// A pair of an Omicron zone ID and an external IP. /// /// Part of [`OmicronZoneNetworkResources`]. -#[derive(Clone, Debug, Deserialize, Serialize)] +#[derive(Clone, Copy, Debug, Deserialize, Serialize)] pub struct OmicronZoneExternalIpEntry { pub zone_id: OmicronZoneUuid, pub ip: OmicronZoneExternalIp, @@ -264,10 +276,10 @@ impl TriMapEntry for OmicronZoneExternalIpEntry { /// A pair of an Omicron zone ID and a network interface. /// /// Part of [`OmicronZoneNetworkResources`]. -#[derive(Clone, Debug, Deserialize, Serialize)] +#[derive(Clone, Copy, Debug, Deserialize, Serialize)] pub struct OmicronZoneNicEntry { - zone_id: OmicronZoneUuid, - nic: OmicronZoneNic, + pub zone_id: OmicronZoneUuid, + pub nic: OmicronZoneNic, } impl TriMapEntry for OmicronZoneNicEntry { diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index ccb15b858a..c8cdeec15b 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -548,6 +548,12 @@ impl PlanningInputBuilder { Ok(self.network_resources.add_nic(zone_id, nic)?) } + pub fn network_resources_mut( + &mut self, + ) -> &mut OmicronZoneNetworkResources { + &mut self.network_resources + } + pub fn policy_mut(&mut self) -> &mut Policy { &mut self.policy } diff --git a/nexus/types/src/deployment/tri_map.rs b/nexus/types/src/deployment/tri_map.rs index 52b64aec43..e4ef320b4f 100644 --- a/nexus/types/src/deployment/tri_map.rs +++ b/nexus/types/src/deployment/tri_map.rs @@ -92,6 +92,10 @@ impl TriMap { } } + pub(crate) fn iter(&self) -> impl Iterator { + self.entries.iter() + } + /// Checks general invariants of the map. /// /// The code below always upholds these invariants, but it's useful to have