forked from oxidecomputer/omicron
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Blueprint builder: Guard against unknown external networking inputs (o…
…xidecomputer#5776) This started as a small cleanup to make `builder.rs` a little less noisy (the first commit, that just moves all the external networking "what IPs are in use and what IP should I pick next" logic into a separate module). As I was doing that, though, I noticed we only look at the parent blueprint when deciding what IPs are used, and we completely ignore `PlanningInput` (which tells us what IPs were in use in the database "recently"). I _think_ we should never have an IP or NIC in the database that isn't also in the parent blueprint: blueprints are the source of how IPs/NICs are created, and we shouldn't prune zones from the blueprint until we've confirmed that external networking records for those zones have been deleted. I made this a planning error: if we get an input that reports external IPs or NICs that the parent blueprint doesn't know about, we bail out and fail to plan. Assuming all of the above is correct (please correct me if any of it seems wrong!), it is still possible for this to fail, I think, if we had a sequence like this: 1. Nexus A generates a `PlanningInput` by reading from CRDB 2. Nexus B executes on a target blueprint that removes IPs/NICs from CRDB 3. Nexus B regenerates a new blueprint and prunes the zone(s) associated with the IPs/NICs from step 2 4. Nexus B makes this new blueprint the target 5. Nexus A attempts to generate a new blueprint with its `PlanningInput` from step 1 This isn't possible today for at least a couple reasons: we don't do prune zones as in step 3 at all, and the timing for steps 3-5 probably require automated blueprint regeneration. I think it could eventually be possible, but seems fine? Nexus A would fail to plan in step 5, but it would succeed the next time it tried (because it would generate a new `PlanningInput` that reflected the changes from step 2).
- Loading branch information
1 parent
12391a9
commit 5d5aab5
Showing
7 changed files
with
475 additions
and
230 deletions.
There are no files selected for viewing
300 changes: 76 additions & 224 deletions
300
nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
Large diffs are not rendered by default.
Oops, something went wrong.
331 changes: 331 additions & 0 deletions
331
nexus/reconfigurator/planning/src/blueprint_builder/external_networking.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,331 @@ | ||
// This Source Code Form is subject to the terms of the Mozilla Public | ||
// License, v. 2.0. If a copy of the MPL was not distributed with this | ||
// file, You can obtain one at https://mozilla.org/MPL/2.0/. | ||
|
||
use super::Error; | ||
use anyhow::bail; | ||
use debug_ignore::DebugIgnore; | ||
use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; | ||
use nexus_types::deployment::Blueprint; | ||
use nexus_types::deployment::BlueprintZoneFilter; | ||
use nexus_types::deployment::BlueprintZoneType; | ||
use nexus_types::deployment::PlanningInput; | ||
use omicron_common::address::DNS_OPTE_IPV4_SUBNET; | ||
use omicron_common::address::DNS_OPTE_IPV6_SUBNET; | ||
use omicron_common::address::NEXUS_OPTE_IPV4_SUBNET; | ||
use omicron_common::address::NEXUS_OPTE_IPV6_SUBNET; | ||
use omicron_common::address::NTP_OPTE_IPV4_SUBNET; | ||
use omicron_common::address::NTP_OPTE_IPV6_SUBNET; | ||
use omicron_common::api::external::IpNet; | ||
use omicron_common::api::external::MacAddr; | ||
use std::collections::HashSet; | ||
use std::hash::Hash; | ||
use std::net::IpAddr; | ||
use std::net::Ipv4Addr; | ||
use std::net::Ipv6Addr; | ||
|
||
#[derive(Debug)] | ||
pub(super) struct BuilderExternalNetworking<'a> { | ||
// These fields mirror how RSS chooses addresses for zone NICs. | ||
nexus_v4_ips: AvailableIterator<'static, Ipv4Addr>, | ||
nexus_v6_ips: AvailableIterator<'static, Ipv6Addr>, | ||
|
||
// Iterator of available external IPs for service zones | ||
available_external_ips: AvailableIterator<'a, IpAddr>, | ||
|
||
// Iterator of available MAC addresses in the system address range | ||
available_system_macs: AvailableIterator<'a, MacAddr>, | ||
} | ||
|
||
impl<'a> BuilderExternalNetworking<'a> { | ||
pub(super) fn new( | ||
parent_blueprint: &'a Blueprint, | ||
input: &'a PlanningInput, | ||
) -> anyhow::Result<Self> { | ||
// Scan through the parent blueprint and build several sets of "used | ||
// resources". When adding new control plane zones to a sled, we may | ||
// need to allocate new resources to that zone. However, allocation at | ||
// this point is entirely optimistic and theoretical: our caller may | ||
// discard the blueprint we create without ever making it the new | ||
// target, or it might be an arbitrarily long time before it becomes | ||
// the target. We need to be able to make allocation decisions that we | ||
// expect the blueprint executor to be able to realize successfully if | ||
// and when we become the target, but we cannot _actually_ perform | ||
// resource allocation. | ||
// | ||
// To do this, we look at our parent blueprint's used resources, and | ||
// then choose new resources that aren't already in use (if possible; | ||
// if we need to allocate a new resource and the parent blueprint | ||
// appears to be using all the resources of that kind, our blueprint | ||
// generation will fail). | ||
// | ||
// For example, RSS assigns Nexus NIC IPs by stepping through a list of | ||
// addresses based on `NEXUS_OPTE_IPVx_SUBNET` (as in the iterators | ||
// below). We use the same list of addresses, but additionally need to | ||
// filter out the existing IPs for any Nexus instances that already | ||
// exist. | ||
// | ||
// Note that by building these iterators up front based on | ||
// `parent_blueprint`, we cannot reuse resources in a case where we | ||
// remove a zone that used a resource and then add another zone that | ||
// wants the same kind of resource. That is mostly okay, but there are | ||
// some cases in which we may have to do that -- particularly external | ||
// DNS zones, which tend to have a small number of fixed IPs. Solving | ||
// that is a TODO. | ||
// | ||
// Also note that currently, we don't perform any kind of garbage | ||
// collection on sleds and zones that no longer have any attached | ||
// resources. Once a sled or zone is marked expunged, it will always | ||
// stay in that state. | ||
// https://github.com/oxidecomputer/omicron/issues/5552 tracks | ||
// implementing this kind of garbage collection, and we should do it | ||
// very soon. | ||
|
||
let mut existing_nexus_v4_ips: HashSet<Ipv4Addr> = HashSet::new(); | ||
let mut existing_nexus_v6_ips: HashSet<Ipv6Addr> = HashSet::new(); | ||
let mut used_external_ips: HashSet<IpAddr> = HashSet::new(); | ||
let mut used_macs: HashSet<MacAddr> = HashSet::new(); | ||
|
||
for (_, z) in | ||
parent_blueprint.all_omicron_zones(BlueprintZoneFilter::All) | ||
{ | ||
let zone_type = &z.zone_type; | ||
if let BlueprintZoneType::Nexus(nexus) = zone_type { | ||
match nexus.nic.ip { | ||
IpAddr::V4(ip) => { | ||
if !existing_nexus_v4_ips.insert(ip) { | ||
bail!("duplicate Nexus NIC IP: {ip}"); | ||
} | ||
} | ||
IpAddr::V6(ip) => { | ||
if !existing_nexus_v6_ips.insert(ip) { | ||
bail!("duplicate Nexus NIC IP: {ip}"); | ||
} | ||
} | ||
} | ||
} | ||
|
||
if let Some((external_ip, nic)) = zone_type.external_networking() { | ||
// For the test suite, ignore localhost. It gets reused many | ||
// times and that's okay. We don't expect to see localhost | ||
// outside the test suite. | ||
if !external_ip.ip().is_loopback() | ||
&& !used_external_ips.insert(external_ip.ip()) | ||
{ | ||
bail!("duplicate external IP: {external_ip:?}"); | ||
} | ||
|
||
if !used_macs.insert(nic.mac) { | ||
bail!("duplicate service vNIC MAC: {}", nic.mac); | ||
} | ||
} | ||
} | ||
|
||
// Check the planning input: there shouldn't be any external networking | ||
// resources in the database (the source of `input`) that we don't know | ||
// about from the parent blueprint. | ||
for external_ip_entry in | ||
input.network_resources().omicron_zone_external_ips() | ||
{ | ||
// As above, ignore localhost (used by the test suite). | ||
if external_ip_entry.ip.ip().is_loopback() { | ||
continue; | ||
} | ||
if !used_external_ips.contains(&external_ip_entry.ip.ip()) { | ||
bail!( | ||
"planning input contains unexpected external IP \ | ||
(IP not found in parent blueprint): {external_ip_entry:?}" | ||
); | ||
} | ||
} | ||
for nic_entry in input.network_resources().omicron_zone_nics() { | ||
if !used_macs.contains(&nic_entry.nic.mac) { | ||
bail!( | ||
"planning input contains unexpected NIC \ | ||
(MAC not found in parent blueprint): {nic_entry:?}" | ||
); | ||
} | ||
match nic_entry.nic.ip { | ||
IpAddr::V4(ip) if NEXUS_OPTE_IPV4_SUBNET.contains(ip) => { | ||
if !existing_nexus_v4_ips.contains(&ip) { | ||
bail!( | ||
"planning input contains unexpected NIC \ | ||
(IP not found in parent blueprint): {nic_entry:?}" | ||
); | ||
} | ||
} | ||
IpAddr::V4(ip) if NTP_OPTE_IPV4_SUBNET.contains(ip) => { | ||
// TODO check existing_ntp_v4_ips, once it exists | ||
} | ||
IpAddr::V4(ip) if DNS_OPTE_IPV4_SUBNET.contains(ip) => { | ||
// TODO check existing_dns_v4_ips, once it exists | ||
} | ||
IpAddr::V6(ip) if NEXUS_OPTE_IPV6_SUBNET.contains(ip) => { | ||
if !existing_nexus_v6_ips.contains(&ip) { | ||
bail!( | ||
"planning input contains unexpected NIC \ | ||
(IP not found in parent blueprint): {nic_entry:?}" | ||
); | ||
} | ||
} | ||
IpAddr::V6(ip) if NTP_OPTE_IPV6_SUBNET.contains(ip) => { | ||
// TODO check existing_ntp_v6_ips, once it exists | ||
} | ||
IpAddr::V6(ip) if DNS_OPTE_IPV6_SUBNET.contains(ip) => { | ||
// TODO check existing_dns_v6_ips, once it exists | ||
} | ||
_ => { | ||
bail!( | ||
"planning input contains unexpected NIC \ | ||
(IP not contained in known OPTE subnet): {nic_entry:?}" | ||
) | ||
} | ||
} | ||
} | ||
|
||
// TODO-performance Building these iterators as "walk through the list | ||
// and skip anything we've used already" is fine as long as we're | ||
// talking about a small number of resources (e.g., single-digit number | ||
// of Nexus instances), but wouldn't be ideal if we have many resources | ||
// we need to skip. We could do something smarter here based on the sets | ||
// of used resources we built above if needed. | ||
let nexus_v4_ips = AvailableIterator::new( | ||
NEXUS_OPTE_IPV4_SUBNET | ||
.0 | ||
.iter() | ||
.skip(NUM_INITIAL_RESERVED_IP_ADDRESSES), | ||
existing_nexus_v4_ips, | ||
); | ||
let nexus_v6_ips = AvailableIterator::new( | ||
NEXUS_OPTE_IPV6_SUBNET | ||
.0 | ||
.iter() | ||
.skip(NUM_INITIAL_RESERVED_IP_ADDRESSES), | ||
existing_nexus_v6_ips, | ||
); | ||
let available_external_ips = AvailableIterator::new( | ||
input.service_ip_pool_ranges().iter().flat_map(|r| r.iter()), | ||
used_external_ips, | ||
); | ||
let available_system_macs = | ||
AvailableIterator::new(MacAddr::iter_system(), used_macs); | ||
|
||
Ok(Self { | ||
nexus_v4_ips, | ||
nexus_v6_ips, | ||
available_external_ips, | ||
available_system_macs, | ||
}) | ||
} | ||
|
||
pub(super) fn for_new_nexus( | ||
&mut self, | ||
) -> Result<ExternalNetworkingChoice, Error> { | ||
let external_ip = self | ||
.available_external_ips | ||
.next() | ||
.ok_or(Error::NoExternalServiceIpAvailable)?; | ||
let (nic_ip, nic_subnet) = match external_ip { | ||
IpAddr::V4(_) => ( | ||
self.nexus_v4_ips | ||
.next() | ||
.ok_or(Error::ExhaustedNexusIps)? | ||
.into(), | ||
IpNet::from(*NEXUS_OPTE_IPV4_SUBNET), | ||
), | ||
IpAddr::V6(_) => ( | ||
self.nexus_v6_ips | ||
.next() | ||
.ok_or(Error::ExhaustedNexusIps)? | ||
.into(), | ||
IpNet::from(*NEXUS_OPTE_IPV6_SUBNET), | ||
), | ||
}; | ||
let nic_mac = self | ||
.available_system_macs | ||
.next() | ||
.ok_or(Error::NoSystemMacAddressAvailable)?; | ||
|
||
Ok(ExternalNetworkingChoice { | ||
external_ip, | ||
nic_ip, | ||
nic_subnet, | ||
nic_mac, | ||
}) | ||
} | ||
} | ||
|
||
#[derive(Debug, Clone, Copy)] | ||
pub(super) struct ExternalNetworkingChoice { | ||
pub(super) external_ip: IpAddr, | ||
pub(super) nic_ip: IpAddr, | ||
pub(super) nic_subnet: IpNet, | ||
pub(super) nic_mac: MacAddr, | ||
} | ||
|
||
/// Combines a base iterator with an `in_use` set, filtering out any elements | ||
/// that are in the "in_use" set. | ||
/// | ||
/// This can be done with a chained `.filter` on the iterator, but | ||
/// `AvailableIterator` also allows for inspection of the `in_use` set. | ||
/// | ||
/// Note that this is a stateful iterator -- i.e. it implements `Iterator`, not | ||
/// `IntoIterator`. That's what we currently need in the planner. | ||
#[derive(Debug)] | ||
struct AvailableIterator<'a, T> { | ||
base: DebugIgnore<Box<dyn Iterator<Item = T> + Send + 'a>>, | ||
in_use: HashSet<T>, | ||
} | ||
|
||
impl<'a, T: Hash + Eq> AvailableIterator<'a, T> { | ||
/// Creates a new `AvailableIterator` from a base iterator and a set of | ||
/// elements that are in use. | ||
fn new<I>(base: I, in_use: impl IntoIterator<Item = T>) -> Self | ||
where | ||
I: Iterator<Item = T> + Send + 'a, | ||
{ | ||
let in_use = in_use.into_iter().collect(); | ||
AvailableIterator { base: DebugIgnore(Box::new(base)), in_use } | ||
} | ||
} | ||
|
||
impl<T: Hash + Eq> Iterator for AvailableIterator<'_, T> { | ||
type Item = T; | ||
|
||
fn next(&mut self) -> Option<T> { | ||
self.base.find(|item| !self.in_use.contains(item)) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
pub mod test { | ||
use super::*; | ||
use test_strategy::proptest; | ||
|
||
/// Test that `AvailableIterator` correctly filters out items that are in | ||
/// use. | ||
#[proptest] | ||
fn test_available_iterator(items: HashSet<(i32, bool)>) { | ||
let mut in_use_map = HashSet::new(); | ||
let mut expected_available = Vec::new(); | ||
let items: Vec<_> = items | ||
.into_iter() | ||
.map(|(item, in_use)| { | ||
if in_use { | ||
in_use_map.insert(item); | ||
} else { | ||
expected_available.push(item); | ||
} | ||
item | ||
}) | ||
.collect(); | ||
|
||
let available = AvailableIterator::new(items.into_iter(), in_use_map); | ||
let actual_available = available.collect::<Vec<_>>(); | ||
|
||
assert_eq!( | ||
expected_available, actual_available, | ||
"available items match" | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.