Skip to content

Commit

Permalink
Network: Switch to long-lived OVN ports (#13900)
Browse files Browse the repository at this point in the history
- [x] Convert OVN logical switch ports and associated config to be
created at NIC create time and deleted at NIC delete time (rather than
start and stop time respectively).
- [x] Update tests to reflect this. See
canonical/lxd-ci#261
- [x] Add DB patch to clear out `volatile.*last_state.ip_addresses`
instance config entries as no longer needed.
- [x] Explore if we can have a network level option to allow the OVN
dynamic IPs to be refreshed on start and cleared on stop to allow the
old dynamic behaviour as well, otherwise it won't be possible to create
more instances than available IPs, even if not all of them are running
at the same time.
- [x] Test live VM migration works - tested using both ceph and zfs
migrations.

Confirmed that creating an OVN NIC when there is insufficient space
causes an error:

```
Error: Failed instance creation: Failed creating instance record: Failed initialising instance: Failed to add device "eth0": Failed setting up OVN port: Insufficient dynamic addresses allocated
```

WRT to exploring if we can later add a network option to allow dynamic
allocation at start time rather than create time, this should be
possible to do by not setting the `dynamic` option when we create the
port, but instead setting it at start time and removing it at stop time.
**But will not add this functionality in this PR.**

E.g.

```
sudo ovn-nbctl find logical_switch_port
addresses           : ["00:16:3e:24:9c:4c"]
dhcpv4_options      : 592a6788-4c73-4773-a8fa-12808377eccf
dhcpv6_options      : 52436e24-f9d1-4436-9de4-96a264739aed
dynamic_addresses   : []
enabled             : []
external_ids        : {lxd_location=none, lxd_switch=lxd-net14-ls-int}
ha_chassis_group    : []
name                : lxd-net14-instance-3dd0d44c-f72f-4981-b04c-bdf6414e0bba-eth0
options             : {}
parent_name         : []
port_security       : []
tag                 : []
tag_request         : []
type                : ""
up                  : false
```

And then:

```
sudo ovn-nbctl lsp-set-addresses lxd-net14-instance-3dd0d44c-f72f-4981-b04c-bdf6414e0bba-eth0 00:16:3e:24:9c:4c dynamic
```

Will result in dynamic IPs being allocated:

```
addresses           : ["00:16:3e:24:9c:4c", dynamic]
dynamic_addresses   : "be:af:33:b8:e5:03 10.184.229.2 fd42:843c:cb92:3d28:bcaf:33ff:feb8:e503"
```

Then removing `dynamic`:

```
sudo ovn-nbctl lsp-set-addresses lxd-net14-instance-3dd0d44c-f72f-4981-b04c-bdf6414e0bba-eth0 00:16:3e:24:9c:4c
```

Causes the dynamic IPs to be removed too:

```
addresses           : ["00:16:3e:24:9c:4c"]
dynamic_addresses   : []
```

It would also need the DNS setup to be delayed until NIC start time and
have it torn down at NIC stop time as otherwise we won't have IPs to
associate to the DNS name.
  • Loading branch information
tomponline authored Aug 27, 2024
2 parents 68e1b1b + c160499 commit 9a759f9
Show file tree
Hide file tree
Showing 6 changed files with 161 additions and 202 deletions.
6 changes: 0 additions & 6 deletions doc/metadata.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2432,12 +2432,6 @@ Possible values are `true` or `false`.
The original MAC that was used when moving a physical device into an instance.
```

```{config:option} volatile.<name>.last_state.ip_addresses instance-volatile
:shortdesc: "Last used IP addresses"
:type: "string"
Comma-separated list of the last used IP addresses of the network device.
```

```{config:option} volatile.<name>.last_state.mtu instance-volatile
:shortdesc: "Network device original MTU"
:type: "string"
Expand Down
103 changes: 38 additions & 65 deletions lxd/device/nic_ovn.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ type ovnNet interface {

InstanceDevicePortValidateExternalRoutes(deviceInstance instance.Instance, deviceName string, externalRoutes []*net.IPNet) error
InstanceDevicePortAdd(instanceUUID string, deviceName string, deviceConfig deviceConfig.Device) error
InstanceDevicePortStart(opts *network.OVNInstanceNICSetupOpts, securityACLsRemove []string) (openvswitch.OVNSwitchPort, []net.IP, error)
InstanceDevicePortStop(ovsExternalOVNPort openvswitch.OVNSwitchPort, opts *network.OVNInstanceNICStopOpts) error
InstanceDevicePortStart(opts *network.OVNInstanceNICSetupOpts, securityACLsRemove []string) (openvswitch.OVNSwitchPort, error)
InstanceDevicePortRemove(instanceUUID string, deviceName string, deviceConfig deviceConfig.Device) error
InstanceDevicePortIPs(instanceUUID string, deviceName string) ([]net.IP, error)
}
Expand Down Expand Up @@ -344,7 +343,41 @@ func (d *nicOVN) checkAddressConflict() error {

// Add is run when a device is added to a non-snapshot instance whether or not the instance is running.
func (d *nicOVN) Add() error {
return d.network.InstanceDevicePortAdd(d.inst.LocalConfig()["volatile.uuid"], d.name, d.config)
networkVethFillFromVolatile(d.config, d.volatileGet())

// Load uplink network config.
uplinkNetworkName := d.network.Config()["network"]

var err error
var uplink *api.Network

err = d.state.DB.Cluster.Transaction(context.TODO(), func(ctx context.Context, tx *db.ClusterTx) error {
_, uplink, _, err = tx.GetNetworkInAnyState(ctx, api.ProjectDefaultName, uplinkNetworkName)

return err
})
if err != nil {
return fmt.Errorf("Failed to load uplink network %q: %w", uplinkNetworkName, err)
}

err = d.network.InstanceDevicePortAdd(d.inst.LocalConfig()["volatile.uuid"], d.name, d.config)
if err != nil {
return err
}

// Add new OVN logical switch port for instance.
_, err = d.network.InstanceDevicePortStart(&network.OVNInstanceNICSetupOpts{
InstanceUUID: d.inst.LocalConfig()["volatile.uuid"],
DNSName: d.inst.Name(),
DeviceName: d.name,
DeviceConfig: d.config,
UplinkConfig: uplink.Config,
}, nil)
if err != nil {
return fmt.Errorf("Failed setting up OVN port: %w", err)
}

return nil
}

// PreStartCheck checks the managed parent network is available (if relevant).
Expand Down Expand Up @@ -555,53 +588,18 @@ func (d *nicOVN) Start() (*deviceConfig.RunConfig, error) {
// Populate device config with volatile fields if needed.
networkVethFillFromVolatile(d.config, saveData)

v := d.volatileGet()

// Retrieve any last state IPs from volatile and pass them to OVN driver for potential use with sticky
// DHCPv4 allocations.
var lastStateIPs []net.IP
for _, ipStr := range shared.SplitNTrimSpace(v["last_state.ip_addresses"], ",", -1, true) {
lastStateIP := net.ParseIP(ipStr)
if lastStateIP != nil {
lastStateIPs = append(lastStateIPs, lastStateIP)
}
}

// Add new OVN logical switch port for instance.
logicalPortName, dnsIPs, err := d.network.InstanceDevicePortStart(&network.OVNInstanceNICSetupOpts{
logicalPortName, err := d.network.InstanceDevicePortStart(&network.OVNInstanceNICSetupOpts{
InstanceUUID: d.inst.LocalConfig()["volatile.uuid"],
DNSName: d.inst.Name(),
DeviceName: d.name,
DeviceConfig: d.config,
UplinkConfig: uplink.Config,
LastStateIPs: lastStateIPs, // Pass in volatile last state IPs for use with sticky DHCPv4 hint.
}, nil)
if err != nil {
return nil, fmt.Errorf("Failed setting up OVN port: %w", err)
}

// Record switch port DNS IPs to volatile so they can be used as sticky DHCPv4 hint in the future in order
// to allocate the same IPs on next start if they are still available/appropriate.
// This volatile key will not be removed when instance stops.
var dnsIPsStr strings.Builder
for i, dnsIP := range dnsIPs {
if i > 0 {
dnsIPsStr.WriteString(",")
}

dnsIPsStr.WriteString(dnsIP.String())
}

saveData["last_state.ip_addresses"] = dnsIPsStr.String()

revert.Add(func() {
_ = d.network.InstanceDevicePortStop("", &network.OVNInstanceNICStopOpts{
InstanceUUID: d.inst.LocalConfig()["volatile.uuid"],
DeviceName: d.name,
DeviceConfig: d.config,
})
})

// Associated host side interface to OVN logical switch port (if not nested).
if integrationBridgeNICName != "" {
cleanup, err := d.setupHostNIC(integrationBridgeNICName, logicalPortName)
Expand Down Expand Up @@ -760,7 +758,7 @@ func (d *nicOVN) Update(oldDevices deviceConfig.Devices, isRunning bool) error {
}

// Update OVN logical switch port for instance.
_, _, err = d.network.InstanceDevicePortStart(&network.OVNInstanceNICSetupOpts{
_, err = d.network.InstanceDevicePortStart(&network.OVNInstanceNICSetupOpts{
InstanceUUID: d.inst.LocalConfig()["volatile.uuid"],
DNSName: d.inst.Name(),
DeviceName: d.name,
Expand Down Expand Up @@ -835,19 +833,8 @@ func (d *nicOVN) Stop() (*deviceConfig.RunConfig, error) {

var err error

// Try and retrieve the last associated OVN switch port for the instance interface in the local OVS DB.
// If we cannot get this, don't fail, as InstanceDevicePortStop will then try and generate the likely
// port name using the same regime it does for new ports. This part is only here in order to allow
// instance ports generated under an older regime to be cleaned up properly.
networkVethFillFromVolatile(d.config, v)
ovs := openvswitch.NewOVS()
var ovsExternalOVNPort openvswitch.OVNSwitchPort
if d.config["nested"] == "" {
ovsExternalOVNPort, err = ovs.InterfaceAssociatedOVNSwitchPort(d.config["host_name"])
if err != nil {
d.logger.Warn("Could not find OVN Switch port associated to OVS interface", logger.Ctx{"interface": d.config["host_name"]})
}
}

integrationBridgeNICName := d.config["host_name"]
if d.config["acceleration"] == "sriov" || d.config["acceleration"] == "vdpa" {
Expand All @@ -872,20 +859,6 @@ func (d *nicOVN) Stop() (*deviceConfig.RunConfig, error) {
}
}

// If the devices config is invalid validateConfig() won't populate this field.
if d.network != nil {
instanceUUID := d.inst.LocalConfig()["volatile.uuid"]
err = d.network.InstanceDevicePortStop(ovsExternalOVNPort, &network.OVNInstanceNICStopOpts{
InstanceUUID: instanceUUID,
DeviceName: d.name,
DeviceConfig: d.config,
})
if err != nil {
// Don't fail here as we still want the postStop hook to run to clean up the local veth pair.
d.logger.Error("Failed to remove OVN device port", logger.Ctx{"err": err})
}
}

// Remove BGP announcements.
err = bgpRemovePrefix(&d.deviceCommon, d.config)
if err != nil {
Expand Down
9 changes: 0 additions & 9 deletions lxd/instance/instancetype/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -1179,15 +1179,6 @@ func ConfigKeyChecker(key string, instanceType Type) (func(value string) error,
return validate.IsAny, nil
}

// lxdmeta:generate(entities=instance; group=volatile; key=volatile.<name>.last_state.ip_addresses)
// Comma-separated list of the last used IP addresses of the network device.
// ---
// type: string
// shortdesc: Last used IP addresses
if strings.HasSuffix(key, ".last_state.ip_addresses") {
return validate.IsListOf(validate.IsNetworkAddress), nil
}

// lxdmeta:generate(entities=instance; group=volatile; key=volatile.<name>.apply_quota)
// The disk quota is applied the next time the instance starts.
// ---
Expand Down
7 changes: 0 additions & 7 deletions lxd/metadata/configuration.json
Original file line number Diff line number Diff line change
Expand Up @@ -2746,13 +2746,6 @@
"type": "string"
}
},
{
"volatile.\u003cname\u003e.last_state.ip_addresses": {
"longdesc": "Comma-separated list of the last used IP addresses of the network device.",
"shortdesc": "Last used IP addresses",
"type": "string"
}
},
{
"volatile.\u003cname\u003e.last_state.mtu": {
"longdesc": "The original MTU that was used when moving a physical device into an instance.",
Expand Down
Loading

0 comments on commit 9a759f9

Please sign in to comment.