diff --git a/felix/bpf/ut/attach_test.go b/felix/bpf/ut/attach_test.go index 4e9f22018bd..71782c250d1 100644 --- a/felix/bpf/ut/attach_test.go +++ b/felix/bpf/ut/attach_test.go @@ -75,7 +75,7 @@ func runAttachTest(t *testing.T, ipv6Enabled bool) { ) Expect(err).NotTo(HaveOccurred()) - host1 := createVethName("hostep1") + host1 := createHostIf("hostep1") defer deleteLink(host1) workload0 := createVethName("workloadep0") @@ -278,7 +278,7 @@ func runAttachTest(t *testing.T, ipv6Enabled bool) { Expect(xdpProgs).To(HaveLen(0)) }) - host2 := createVethName("hostep2") + host2 := createHostIf("hostep2") defer deleteLink(host2) t.Run("create another host interface without a host endpoint (no policy)", func(t *testing.T) { @@ -848,7 +848,7 @@ func TestLogFilters(t *testing.T) { ) Expect(err).NotTo(HaveOccurred()) - host1 := createVethName("hostep1") + host1 := createHostIf("hostep1") defer deleteLink(host1) workload0 := createVethName("workloadep0") diff --git a/felix/bpf/ut/precompilation_test.go b/felix/bpf/ut/precompilation_test.go index 319b73b2a08..daa7a86872a 100644 --- a/felix/bpf/ut/precompilation_test.go +++ b/felix/bpf/ut/precompilation_test.go @@ -94,6 +94,18 @@ func createVethName(name string) netlink.Link { return veth } +func createHostIf(name string) netlink.Link { + la := netlink.NewLinkAttrs() + la.Name = name + la.Flags = net.FlagUp + var hostIf netlink.Link = &netlink.Dummy{ + LinkAttrs: la, + } + err := netlink.LinkAdd(hostIf) + ExpectWithOffset(1, err).NotTo(HaveOccurred(), fmt.Sprintf("failed to create test hostIf: %q", name)) + return hostIf +} + func deleteLink(veth netlink.Link) { err := netlink.LinkDel(veth) Expect(err).NotTo(HaveOccurred(), "failed to delete test veth") diff --git a/felix/dataplane/linux/bpf_ep_mgr.go b/felix/dataplane/linux/bpf_ep_mgr.go index e2bbfd512e3..45891faf3e9 100644 --- a/felix/dataplane/linux/bpf_ep_mgr.go +++ b/felix/dataplane/linux/bpf_ep_mgr.go @@ -212,6 +212,15 @@ var zeroIface bpfInterface = func() bpfInterface { return i }() +type bpfHostIface struct { + name string + index int + masterIndex int + parentIndex int + parentIface *bpfHostIface + children map[int]*bpfHostIface +} + type bpfInterfaceInfo struct { ifIndex int isUP bool @@ -305,6 +314,7 @@ type bpfEndpointManager struct { ifStateMap *cachingmap.CachingMap[ifstate.Key, ifstate.Value] removeOldJumps bool legacyCleanUp bool + hostIfaces map[int]*bpfHostIface jumpMapAlloc *jumpMapAlloc xdpJumpMapAlloc *jumpMapAlloc @@ -331,10 +341,9 @@ type bpfEndpointManager struct { updatePolicyProgramFn func(rules polprog.Rules, polDir string, ap attachPoint, ipFamily proto.IPVersion) error // HEP processing. - hostIfaceToEpMap map[string]proto.HostEndpoint - wildcardHostEndpoint proto.HostEndpoint - wildcardExists bool - hostIfaceToSlaveDevices map[string]set.Set[string] + hostIfaceToEpMap map[string]proto.HostEndpoint + wildcardHostEndpoint proto.HostEndpoint + wildcardExists bool // UT-able BPF dataplane interface. dp bpfDataplane @@ -482,6 +491,7 @@ func newBPFEndpointManager( policiesToWorkloads: map[proto.PolicyID]set.Set[any]{}, profilesToWorkloads: map[proto.ProfileID]set.Set[any]{}, dirtyIfaceNames: set.New[string](), + hostIfaces: make(map[int]*bpfHostIface), bpfLogLevel: config.BPFLogLevel, logFilters: config.BPFLogFilters, hostname: config.Hostname, @@ -506,13 +516,12 @@ func newBPFEndpointManager( // Note: the allocators only allocate a fraction of the map, the // rest is reserved for sub-programs generated if a single program // would be too large. - jumpMapAlloc: newJumpMapAlloc(jump.TCMaxEntryPoints), - xdpJumpMapAlloc: newJumpMapAlloc(jump.XDPMaxEntryPoints), - ruleRenderer: iptablesRuleRenderer, - onStillAlive: livenessCallback, - hostIfaceToEpMap: map[string]proto.HostEndpoint{}, - hostIfaceToSlaveDevices: map[string]set.Set[string]{}, - opReporter: opReporter, + jumpMapAlloc: newJumpMapAlloc(jump.TCMaxEntryPoints), + xdpJumpMapAlloc: newJumpMapAlloc(jump.XDPMaxEntryPoints), + ruleRenderer: iptablesRuleRenderer, + onStillAlive: livenessCallback, + hostIfaceToEpMap: map[string]proto.HostEndpoint{}, + opReporter: opReporter, // ipv6Enabled Should be set to config.Ipv6Enabled, but for now it is better // to set it to BPFIpv6Enabled which is a dedicated flag for development of IPv6. // TODO: set ipv6Enabled to config.Ipv6Enabled when IPv6 support is complete @@ -1055,38 +1064,46 @@ func (m *bpfEndpointManager) deleteIfaceCounters(name string, ifindex int) { log.Debugf("Deleted counters for dev %s ifindex %d.", name, ifindex) } -func (m *bpfEndpointManager) cleanupOldAttach(iface string, ai bpf.EPAttachInfo) error { - if ai.XDP != 0 { - ap := xdp.AttachPoint{ - AttachPoint: bpf.AttachPoint{ - Iface: iface, - Hook: hook.XDP, - }, - // Try all modes in this order - Modes: []bpf.XDPMode{bpf.XDPGeneric, bpf.XDPDriver, bpf.XDPOffload}, - } +func (m *bpfEndpointManager) cleanupOldXDPAttach(iface string) error { + ap := xdp.AttachPoint{ + AttachPoint: bpf.AttachPoint{ + Iface: iface, + Hook: hook.XDP, + }, + Modes: []bpf.XDPMode{bpf.XDPGeneric, bpf.XDPDriver, bpf.XDPOffload}, + } + if err := m.dp.ensureNoProgram(&ap); err != nil { + return fmt.Errorf("xdp: %w", err) + } + return nil +} - if err := m.dp.ensureNoProgram(&ap); err != nil { - return fmt.Errorf("xdp: %w", err) - } +func (m *bpfEndpointManager) cleanupOldTcAttach(iface string) error { + ap := tc.AttachPoint{ + AttachPoint: bpf.AttachPoint{ + Iface: iface, + Hook: hook.Egress, + }, } - if ai.Ingress != 0 || ai.Egress != 0 { - ap := tc.AttachPoint{ - AttachPoint: bpf.AttachPoint{ - Iface: iface, - Hook: hook.Egress, - }, - } - if err := m.dp.ensureNoProgram(&ap); err != nil { - return fmt.Errorf("tc egress: %w", err) - } + if err := m.dp.ensureNoProgram(&ap); err != nil { + return fmt.Errorf("tc egress: %w", err) + } - ap.Hook = hook.Ingress + ap.Hook = hook.Ingress - if err := m.dp.ensureNoProgram(&ap); err != nil { - return fmt.Errorf("tc ingress: %w", err) - } + if err := m.dp.ensureNoProgram(&ap); err != nil { + return fmt.Errorf("tc ingress: %w", err) + } + return nil +} + +func (m *bpfEndpointManager) cleanupOldAttach(iface string, ai bpf.EPAttachInfo) error { + if ai.XDP != 0 { + return m.cleanupOldXDPAttach(iface) + } + if ai.Ingress != 0 || ai.Egress != 0 { + return m.cleanupOldTcAttach(iface) } return nil @@ -1123,86 +1140,32 @@ func (m *bpfEndpointManager) onInterfaceUpdate(update *ifaceStateUpdate) { } masterIfIndex := 0 - prevMasterIfIndex := 0 curIfaceType := IfaceTypeUnknown - prevIfaceType := IfaceTypeUnknown - if val, ok := m.nameToIface[update.Name]; ok { - prevIfaceType = val.info.ifaceType - prevMasterIfIndex = val.info.masterIfIndex - } - - if update.State != ifacemonitor.StateNotPresent && !m.isWorkloadIface(update.Name) { - // Determine the type of interface. - // These include host, bond, slave, ipip, wireguard, l3. - // update the ifaceType, master ifindex if bond slave. - link, err := m.dp.getIfaceLink(update.Name) - if err != nil { - log.Errorf("Failed to get interface information via netlink '%s'", update.Name) - curIfaceType = IfaceTypeL3 - if m.isDataIface(update.Name) { - curIfaceType = IfaceTypeData - } - } else { - curIfaceType = m.getIfaceTypeFromLink(link) - masterIfIndex = link.Attrs().MasterIndex - } - if prevIfaceType != curIfaceType { - if curIfaceType == IfaceTypeBondSlave { - // Remove the Tc program. - ai, err := bpf.ListCalicoAttached() - if err != nil { - log.WithError(err).Warn("Failed to list attached programs") - } else { - if err := m.cleanupOldAttach(update.Name, ai[update.Name]); err != nil { - log.WithError(err).Warnf("Failed to detach old programs from now bonding device '%s'", update.Name) - } - } - } else if curIfaceType == IfaceTypeBond { - // create an entry in the hostIfaceToSlaveDevices - if _, ok := m.hostIfaceToSlaveDevices[update.Name]; !ok { - m.hostIfaceToSlaveDevices[update.Name] = set.New[string]() - } - } - } - } - - // Manage bond slaves. - if !m.isWorkloadIface(update.Name) && curIfaceType != prevIfaceType { - if curIfaceType == IfaceTypeBondSlave { - /* Interface has been added to a bond. - * Add this interface to the list of slave devices - * of the master. - */ - netiface, err := m.dp.interfaceByIndex(masterIfIndex) + if !m.isWorkloadIface(update.Name) { + if update.State != ifacemonitor.StateNotPresent { + // Determine the type of interface. + // These include host, bond, slave, ipip, wireguard, l3. + // update the ifaceType, master ifindex if bond slave. + link, err := m.dp.getIfaceLink(update.Name) if err != nil { - log.WithError(err).Warn("Failed to get master interface name. Slave devices not updated") - } else { - // Slave interface update comes first. - if _, ok := m.hostIfaceToSlaveDevices[netiface.Name]; !ok { - m.hostIfaceToSlaveDevices[netiface.Name] = set.New[string]() + log.Errorf("Failed to get interface information via netlink '%s'", update.Name) + curIfaceType = IfaceTypeL3 + if m.isDataIface(update.Name) { + curIfaceType = IfaceTypeData } - m.hostIfaceToSlaveDevices[netiface.Name].Add(update.Name) + } else { + m.addHostIface(link) + curIfaceType = m.getIfaceTypeFromLink(link) + masterIfIndex = link.Attrs().MasterIndex + // Mark all the interfaces in the tree dirty, so that program can be attached/removed. + m.dirtyIfaceNames.AddSet(m.getAllIfacesInTree(update.Name)) } } else { - /* Interface is either removed from the bond or deleted. - * In such cases, remove the interface from the list of slave devices. - */ - if prevIfaceType == IfaceTypeBondSlave { - netiface, err := m.dp.interfaceByIndex(prevMasterIfIndex) - if err != nil { - log.WithError(err).Warn("Failed to get master interface name. Slave devices not updated") - } else { - if _, ok := m.hostIfaceToSlaveDevices[netiface.Name]; ok { - m.hostIfaceToSlaveDevices[netiface.Name].Discard(update.Name) - } - } - } - /* Interface is not a bond anymore. Remove the interface - * from the map. - */ - if prevIfaceType == IfaceTypeBond { - delete(m.hostIfaceToSlaveDevices, update.Name) - } + allIfaces := m.getAllIfacesInTree(update.Name) + // Mark all the interfaces in the tree dirty, so that program can be attached/removed. + allIfaces.Discard(update.Name) + m.deleteHostIface(update.Name) + m.dirtyIfaceNames.AddSet(allIfaces) } } @@ -1917,45 +1880,90 @@ func (m *bpfEndpointManager) applyProgramsToDirtyDataInterfaces() { return nil } xdpMode := XDPModeAll + attachTc := true masterName := "" - m.ifacesLock.Lock() - val, ok := m.nameToIface[iface] - m.ifacesLock.Unlock() - if ok { - if val.info.ifaceType == IfaceTypeBondSlave { - // Check if the master device matches the regex. - // If it does, ignore slave devices. If not, - // throw a warning and continue to attach to slave. - masterIfa, err := m.dp.interfaceByIndex(val.info.masterIfIndex) - if err != nil { - log.Warnf("Failed to get master interface details for '%s'. Continuing to attach program", iface) - } else { - masterName = masterIfa.Name - if !m.isDataIface(masterIfa.Name) { - log.Warnf("Master interface '%s' ignored. Add it to the bpfDataIfacePattern config", masterIfa.Name) + + /* If the interface is not found either in netlink or the host iface tree, attach Tc and XDP. + * If the interface is a root interface, and not a leaf interface, attach only Tc and cleanup XDP. + * If the interface is both a root and leaf interface, attach both Tc and XDP. + * If the interface is a leaf interface, attach only XDP and cleanup Tc. + * If the interface is neither a root nor a leaf, cleanup both Tc and XDP. + */ + link, err := m.dp.getIfaceLink(iface) + if err != nil { + log.WithField("iface", iface).Debug( + "Error getting link") + } else { + hostIf := m.findHostIfaceByIndex(link.Attrs().Index) + if hostIf == nil { + log.WithField("iface", iface).Debug( + "Host Iface not found in tree") + } else { + isRoot := isRootIface(hostIf) + isLeaf := isLeafIface(hostIf) + if isRoot { + // Root interface and not a leaf. Attach only Tc. + // set xdp mode to None and remove any previously attached + // xdp programs. + if !isLeaf { + xdpMode = XDPModeNone + } + // Root and leaf. Single interface tree. Attach both Tc and XDP. + } else if isLeaf { + masterIfa := m.getRootInterface(hostIf) + if err != nil { + log.Warnf("Failed to get master interface details for '%s'. Continuing to attach program", iface) } else { - log.WithField("iface", iface).Debug( - "Attaching xdp only") - xdpMode = XDPModeOnly + masterName = masterIfa.name + if !m.isDataIface(masterName) { + log.Warnf("Master interface '%s' ignored. Add it to the bpfDataIfacePattern config", masterName) + } else { + log.WithField("iface", iface).Debug( + "Attaching xdp only") + xdpMode = XDPModeOnly + attachTc = false + } } + } else { + xdpMode = XDPModeNone + attachTc = false } } - if val.info.ifaceType == IfaceTypeBond { - xdpMode = XDPModeNone + } + + if !attachTc { + // Remove any previously attached Tc program. + err := m.cleanupOldTcAttach(iface) + if err != nil { + log.Warnf("error removing old Tc program from '%s'.", iface) + } + } + + if xdpMode == XDPModeNone { + log.Debugf("Attaching only Tc programs to %s", iface) + // Remove any previously attached XDP program. + err = m.cleanupOldXDPAttach(iface) + if err != nil { + log.Warnf("error removing old xdp program from '%s'.", iface) } } + m.opReporter.RecordOperation("update-data-iface") wg.Add(1) go func(ifaceName string) { + var state bpfInterfaceState + var err error defer wg.Done() - state, err := m.doApplyPolicyToDataIface(ifaceName, masterName, xdpMode) - m.ifacesLock.Lock() - m.withIface(ifaceName, func(bpfIface *bpfInterface) bool { - bpfIface.dpState = state - return false - }) - m.ifacesLock.Unlock() + if xdpMode != XDPModeNone || attachTc { + state, err = m.doApplyPolicyToDataIface(ifaceName, masterName, xdpMode) + m.ifacesLock.Lock() + m.withIface(ifaceName, func(bpfIface *bpfInterface) bool { + bpfIface.dpState = state + return false + }) + m.ifacesLock.Unlock() + } if err == nil { // This is required to allow NodePort forwarding with // encapsulation with the host's IP as the source address @@ -2181,7 +2189,6 @@ func (m *bpfEndpointManager) wepStateFillJumps(ap *tc.AttachPoint, state *bpfInt func (m *bpfEndpointManager) dataIfaceStateFillJumps(ap *tc.AttachPoint, xdpMode XDPMode, state *bpfInterfaceState) error { var err error - if m.v4 != nil { err = m.allocJumpIndicesForDataIface(ap.IfaceName(), xdpMode, &state.v4) if err != nil { @@ -4201,10 +4208,246 @@ func (m *bpfEndpointManager) getIfaceTypeFromLink(link netlink.Link) IfaceType { } func (m *bpfEndpointManager) getBondSlaves(masterIfName string) set.Set[string] { - if val, ok := m.hostIfaceToSlaveDevices[masterIfName]; ok { + slaveDevices := set.New[string]() + link, err := m.dp.getIfaceLink(masterIfName) + if err != nil { + log.Errorf("Error getting link for interface %s, err = %v", masterIfName, err) + return slaveDevices + } + hostIf := m.findHostIfaceByIndex(link.Attrs().Index) + if hostIf == nil { + log.Errorf("error finding interface %s", masterIfName) + return slaveDevices + } + + if len(hostIf.children) > 0 { + leaves := []string{} + getLeafNodes(hostIf, &leaves) + slaveDevices.AddAll(leaves) + } + return slaveDevices +} + +// addHostIface adds host interface to hostIfaces tree. +func (m *bpfEndpointManager) addHostIface(link netlink.Link) { + attrs := link.Attrs() + intf := &bpfHostIface{name: attrs.Name, + index: attrs.Index, + masterIndex: attrs.MasterIndex, + parentIndex: attrs.ParentIndex, + children: make(map[int]*bpfHostIface)} + + if attrs.MasterIndex == 0 && attrs.ParentIndex == 0 { + eintf := m.findHostIfaceByIndex(attrs.Index) + if eintf != nil { + eintf.name = intf.name + eintf.index = intf.index + eintf.masterIndex = intf.masterIndex + eintf.parentIndex = intf.parentIndex + } + if val, exists := m.hostIfaces[attrs.Index]; !exists { + m.hostIfaces[attrs.Index] = intf + } else { + val.name = intf.name + val.index = intf.index + val.masterIndex = intf.masterIndex + val.parentIndex = intf.parentIndex + } + } else if attrs.MasterIndex != 0 { + m.deleteHostIface(intf.name) + // If the master interface is in the tree, add it as a slave. + if val, exists := m.hostIfaces[attrs.MasterIndex]; exists { + intf.parentIface = val + val.children[attrs.Index] = intf + m.hostIfaces[attrs.MasterIndex] = val + } else { + // Need to search slave devices. + masterIf := m.findHostIfaceByIndex(attrs.MasterIndex) + if masterIf != nil { + intf.parentIface = masterIf + masterIf.children[attrs.Index] = intf + } else { + masterIface := &bpfHostIface{index: attrs.MasterIndex, children: make(map[int]*bpfHostIface)} + intf.parentIface = masterIface + masterIface.children[attrs.Index] = intf + m.hostIfaces[attrs.MasterIndex] = masterIface + } + } + delete(m.hostIfaces, attrs.Index) + } else if attrs.ParentIndex != 0 { + // bond0 already in list + if val, exists := m.hostIfaces[attrs.ParentIndex]; exists { + val.parentIface = intf + intf.children[val.index] = val + delete(m.hostIfaces, attrs.ParentIndex) + } else { + intf.children[attrs.ParentIndex] = &bpfHostIface{index: attrs.ParentIndex, + parentIface: intf, + children: make(map[int]*bpfHostIface)} + } + m.hostIfaces[attrs.Index] = intf + } +} + +func findInIfaceTree(parent *bpfHostIface, index int, name string, byIndex bool) (*bpfHostIface, *bpfHostIface) { + if parent == nil { + return nil, nil + } + for _, child := range parent.children { + if byIndex { + if child.index == index { + return child, parent + } + } else { + if child.name == name { + return child, parent + } + } + node, parent := findInIfaceTree(child, index, name, byIndex) + if node != nil { + return node, parent + } + } + return nil, nil +} + +func deleteIfaceFromTree(intf *bpfHostIface, name string) (map[int]*bpfHostIface, bool) { + ret := make(map[int]*bpfHostIface) + // delete root node. + if intf.name == name { + for k, v := range intf.children { + v.parentIface = nil + ret[k] = v + } + return ret, true + } + + child, parent := findInIfaceTree(intf, -1, name, false) + if child != nil { + // leaf node. + if len(child.children) == 0 { + delete(parent.children, child.index) + } else { + for k, v := range child.children { + v.parentIface = nil + v.masterIndex = 0 + ret[k] = v + } + return ret, true + } + } + return ret, false +} + +func (m *bpfEndpointManager) deleteHostIface(name string) { + newKeys := make(map[int]*bpfHostIface) + for k, intf := range m.hostIfaces { + ret, toDelete := deleteIfaceFromTree(intf, name) + for k1, v1 := range ret { + newKeys[k1] = v1 + } + if toDelete { + delete(m.hostIfaces, k) + } + } + for k, v := range newKeys { + m.hostIfaces[k] = v + } +} + +func (m *bpfEndpointManager) getRootInterface(hostIf *bpfHostIface) *bpfHostIface { + temp := hostIf + for { + if temp.parentIface == nil { + return temp + } + temp = temp.parentIface + } +} + +// getAllIfacesInTree returns all the interface names in the tree as a slice. +func (m *bpfEndpointManager) getAllIfacesInTree(name string) set.Set[string] { + allIfaces := set.New[string]() + hostIf := m.findHostIfaceByName(name) + if hostIf == nil { + return allIfaces + } + root := m.getRootInterface(hostIf) + if root == nil { + return allIfaces + } + nodes := []string{} + getAllIfaces(root, &nodes) + allIfaces.AddAll(nodes) + return allIfaces +} + +// findHostIfaceByIndex returns the bpfHostIface if present matching the index. +func (m *bpfEndpointManager) findHostIfaceByIndex(index int) *bpfHostIface { + val, exists := m.hostIfaces[index] + if exists { return val } - return set.New[string]() + for _, iface := range m.hostIfaces { + slaveIf, _ := findInIfaceTree(iface, index, "", true) + if slaveIf != nil { + return slaveIf + } + } + return nil +} + +// findHostIfaceByName returns the bpfHostIface if present matching the name. +func (m *bpfEndpointManager) findHostIfaceByName(name string) *bpfHostIface { + for _, val := range m.hostIfaces { + if val.name == name { + return val + } + } + for _, iface := range m.hostIfaces { + slaveIf, _ := findInIfaceTree(iface, -1, name, false) + if slaveIf != nil { + return slaveIf + } + } + return nil +} + +func getAllIfaces(root *bpfHostIface, nodes *[]string) { + if root == nil { + return + } + *nodes = append(*nodes, root.name) + for _, child := range root.children { + getAllIfaces(child, nodes) + } +} + +func isLeafIface(hostIf *bpfHostIface) bool { + for _, child := range hostIf.children { + if child.name != "" { + return false + } + } + return true +} + +func isRootIface(hostIf *bpfHostIface) bool { + return hostIf.parentIface == nil +} + +// getLeafNodes returns the list of leaf nodes, given +// any node in the tree. +func getLeafNodes(intf *bpfHostIface, leaves *[]string) { + if intf == nil { + return + } + if len(intf.children) == 0 { + *leaves = append(*leaves, intf.name) + } + for _, child := range intf.children { + getLeafNodes(child, leaves) + } } func newJumpMapAlloc(entryPoints int) *jumpMapAlloc { diff --git a/felix/dataplane/linux/bpf_ep_mgr_test.go b/felix/dataplane/linux/bpf_ep_mgr_test.go index 435ef476dbd..f1b1f538592 100644 --- a/felix/dataplane/linux/bpf_ep_mgr_test.go +++ b/felix/dataplane/linux/bpf_ep_mgr_test.go @@ -215,6 +215,19 @@ func (m *mockDataplane) createIface(name string, index int, linkType string) err return m.netlinkShim.LinkAdd(&iface) } +func (m *mockDataplane) createVlanIface(name string, index, parentIndex int) error { + attr := netlink.NewLinkAttrs() + attr.Name = name + attr.Index = index + attr.ParentIndex = parentIndex + + iface := netlink.GenericLink{ + LinkAttrs: attr, + LinkType: "vlan", + } + return m.netlinkShim.LinkAdd(&iface) +} + func (m *mockDataplane) createBondSlaves(name string, index, masterIndex int) error { attr := netlink.NewLinkAttrs() attr.Name = name @@ -487,25 +500,6 @@ var _ = Describe("BPF Endpoint Manager", func() { Expect(name).To(Equal(vv.IfName())) } - getPolicyIdx := func(idx int, name, hook string) int { - k := ifstate.NewKey(uint32(idx)) - vb, err := ifStateMap.Get(k.AsBytes()) - if err != nil { - Fail(fmt.Sprintf("Ifstate does not have key %s", k), 1) - } - vv := ifstate.ValueFromBytes(vb) - Expect(name).To(Equal(vv.IfName())) - switch hook { - case "ingress": - return vv.IngressPolicyV4() - case "egress": - return vv.EgressPolicyV4() - case "xdp": - return vv.XDPPolicyV4() - } - return -1 - } - genIfaceUpdate := func(name string, state ifacemonitor.State, index int) func() { return func() { bpfEpMgr.OnUpdate(&ifaceStateUpdate{Name: name, State: state, Index: index}) @@ -654,13 +648,281 @@ var _ = Describe("BPF Endpoint Manager", func() { Context("with workload and host-* endpoints", func() { JustBeforeEach(func() { + dp.interfaceByIndexFn = func(ifindex int) (*net.Interface, error) { + if ifindex == 10 { + return &net.Interface{ + Name: "bond0", + Index: 10, + Flags: net.FlagUp, + }, nil + } + if ifindex == 11 { + return &net.Interface{ + Name: "bond0.100", + Index: 11, + Flags: net.FlagUp, + }, nil + } + if ifindex == 20 { + return &net.Interface{ + Name: "eth10", + Index: 20, + Flags: net.FlagUp, + }, nil + } + if ifindex == 30 { + return &net.Interface{ + Name: "eth20", + Index: 30, + Flags: net.FlagUp, + }, nil + } + return nil, errors.New("no such network interface") + } genPolicy("default", "mypolicy")() - genIfaceUpdate("eth0", ifacemonitor.StateUp, 10)() + genIfaceUpdate("eth0", ifacemonitor.StateUp, 3)() genWLUpdate("cali12345")() genIfaceUpdate("cali12345", ifacemonitor.StateUp, 15)() genHEPUpdate(allInterfaces, hostEpNorm)() }) + It("should attach/detach programs when ifaces are added/deleted", func() { + dataIfacePattern = "^eth|bond*" + newBpfEpMgr(false) + genUntracked("default", "untracked1")() + newHEP := hostEp + newHEP.UntrackedTiers = []*proto.TierInfo{{ + Name: "default", + IngressPolicies: []string{"untracked1"}, + }} + err := dp.createIface("bond0", 10, "bond") + Expect(err).NotTo(HaveOccurred()) + err = dp.createBondSlaves("eth10", 20, 10) + Expect(err).NotTo(HaveOccurred()) + err = dp.createBondSlaves("eth20", 30, 10) + Expect(err).NotTo(HaveOccurred()) + genHEPUpdate("bond0", newHEP)() + genIfaceUpdate("bond0", ifacemonitor.StateUp, 10)() + Expect(len(bpfEpMgr.hostIfaces)).To(Equal(1)) + Expect(dp.programAttached("bond0:ingress")).To(BeTrue()) + Expect(dp.programAttached("bond0:egress")).To(BeTrue()) + Expect(dp.programAttached("bond0:xdp")).To(BeTrue()) + + genIfaceUpdate("eth10", ifacemonitor.StateUp, 20)() + Expect(dp.programAttached("eth10:ingress")).To(BeFalse()) + Expect(dp.programAttached("eth10:egress")).To(BeFalse()) + Expect(dp.programAttached("eth10:xdp")).To(BeTrue()) + + genIfaceUpdate("eth20", ifacemonitor.StateUp, 30)() + Expect(dp.programAttached("eth20:ingress")).To(BeFalse()) + Expect(dp.programAttached("eth20:egress")).To(BeFalse()) + Expect(dp.programAttached("eth20:xdp")).To(BeTrue()) + Expect(dp.programAttached("bond0:ingress")).To(BeTrue()) + Expect(dp.programAttached("bond0:egress")).To(BeTrue()) + Expect(dp.programAttached("bond0:xdp")).To(BeFalse()) + + err = dp.createVlanIface("bond0.100", 11, 10) + Expect(err).NotTo(HaveOccurred()) + genHEPUpdate("bond0.100", newHEP)() + genIfaceUpdate("bond0.100", ifacemonitor.StateUp, 11)() + Expect(dp.programAttached("bond0.100:ingress")).To(BeTrue()) + Expect(dp.programAttached("bond0.100:egress")).To(BeTrue()) + Expect(dp.programAttached("bond0.100:xdp")).To(BeFalse()) + Expect(dp.programAttached("bond0:ingress")).To(BeFalse()) + Expect(dp.programAttached("bond0:egress")).To(BeFalse()) + Expect(dp.programAttached("bond0:xdp")).To(BeFalse()) + Expect(dp.programAttached("eth20:ingress")).To(BeFalse()) + Expect(dp.programAttached("eth20:egress")).To(BeFalse()) + Expect(dp.programAttached("eth20:xdp")).To(BeTrue()) + Expect(dp.programAttached("eth10:ingress")).To(BeFalse()) + Expect(dp.programAttached("eth10:egress")).To(BeFalse()) + Expect(dp.programAttached("eth10:xdp")).To(BeTrue()) + + genIfaceUpdate("bond0.100", ifacemonitor.StateNotPresent, 11)() + Expect(dp.programAttached("bond0:ingress")).To(BeTrue()) + Expect(dp.programAttached("bond0:egress")).To(BeTrue()) + Expect(dp.programAttached("bond0:xdp")).To(BeFalse()) + Expect(dp.programAttached("eth10:ingress")).To(BeFalse()) + Expect(dp.programAttached("eth10:egress")).To(BeFalse()) + Expect(dp.programAttached("eth10:xdp")).To(BeFalse()) + Expect(dp.programAttached("eth20:ingress")).To(BeFalse()) + Expect(dp.programAttached("eth20:egress")).To(BeFalse()) + Expect(dp.programAttached("eth20:xdp")).To(BeFalse()) + + // Delete bond Iface + genIfaceUpdate("bond0", ifacemonitor.StateNotPresent, 10)() + Expect(dp.programAttached("eth10:ingress")).To(BeTrue()) + Expect(dp.programAttached("eth10:egress")).To(BeTrue()) + Expect(dp.programAttached("eth10:xdp")).To(BeFalse()) + Expect(dp.programAttached("eth20:ingress")).To(BeTrue()) + Expect(dp.programAttached("eth20:egress")).To(BeTrue()) + Expect(dp.programAttached("eth20:xdp")).To(BeFalse()) + + }) + + It("should add host ifaces to iface tree", func() { + dataIfacePattern = "^eth|bond*" + newBpfEpMgr(false) + err := dp.createIface("bond0", 10, "bond") + Expect(err).NotTo(HaveOccurred()) + err = dp.createBondSlaves("eth10", 20, 10) + Expect(err).NotTo(HaveOccurred()) + err = dp.createBondSlaves("eth20", 30, 10) + Expect(err).NotTo(HaveOccurred()) + genIfaceUpdate("bond0", ifacemonitor.StateUp, 10)() + genIfaceUpdate("eth10", ifacemonitor.StateUp, 20)() + genIfaceUpdate("eth20", ifacemonitor.StateUp, 30)() + + bondIface := bpfEpMgr.findHostIfaceByIndex(10) + eth10Iface := bpfEpMgr.findHostIfaceByIndex(20) + eth20Iface := bpfEpMgr.findHostIfaceByIndex(30) + + /* Check if bond0, eth10, eth20 exist in the tree and + * at the right position. + */ + Expect(len(bpfEpMgr.hostIfaces)).To(Equal(1)) + Expect(bpfEpMgr.hostIfaces).To(HaveKey(10)) + Expect(bondIface).NotTo(BeNil()) + Expect(eth10Iface).NotTo(BeNil()) + Expect(eth20Iface).NotTo(BeNil()) + + Expect(isRootIface(bondIface)).To(BeTrue()) + Expect(isRootIface(eth10Iface)).To(BeFalse()) + Expect(isRootIface(eth20Iface)).To(BeFalse()) + Expect(isLeafIface(eth10Iface)).To(BeTrue()) + Expect(isLeafIface(eth20Iface)).To(BeTrue()) + Expect(isLeafIface(bondIface)).To(BeFalse()) + + // Check if bond slave information is correct. + leaves := bpfEpMgr.getBondSlaves("bond0") + Expect(leaves.Len()).To(Equal(2)) + Expect(leaves.Slice()).To(ConsistOf("eth10", "eth20")) + + // Create bond vlan interface. + err = dp.createVlanIface("bond0.100", 11, 10) + Expect(err).NotTo(HaveOccurred()) + genIfaceUpdate("bond0.100", ifacemonitor.StateUp, 11)() + Expect(len(bpfEpMgr.hostIfaces)).To(Equal(1)) + Expect(bpfEpMgr.hostIfaces).To(HaveKey(11)) + + // Check if bond vlan interface is the root and bond + // is neither a root nor a leaf. + bondVlanIface := bpfEpMgr.findHostIfaceByIndex(11) + Expect(bondVlanIface).NotTo(BeNil()) + Expect(isRootIface(bondVlanIface)).To(BeTrue()) + Expect(isRootIface(bondIface)).To(BeFalse()) + Expect(isLeafIface(bondIface)).To(BeFalse()) + + // Validate the tree. + val := bpfEpMgr.hostIfaces[11] + Expect(len(val.children)).To(Equal(1)) + Expect(val.children).To(HaveKey(10)) + + val = val.children[10] + Expect(len(val.children)).To(Equal(2)) + Expect(val.children).To(HaveKey(20)) + Expect(val.children).To(HaveKey(30)) + + leaves = bpfEpMgr.getBondSlaves("bond0.100") + Expect(leaves.Len()).To(Equal(2)) + Expect(leaves.Slice()).To(ConsistOf("eth10", "eth20")) + leaves = bpfEpMgr.getBondSlaves("eth10") + Expect(leaves.Len()).To(Equal(0)) + leaves = bpfEpMgr.getBondSlaves("eth0") + Expect(leaves.Len()).To(Equal(0)) + + // Add a new iface eth0. + genIfaceUpdate("eth0", ifacemonitor.StateUp, 3)() + Expect(len(bpfEpMgr.hostIfaces)).To(Equal(2)) + eth0Iface := bpfEpMgr.findHostIfaceByIndex(3) + Expect(eth0Iface).NotTo(BeNil()) + Expect(isRootIface(eth0Iface)).To(BeTrue()) + Expect(isLeafIface(eth0Iface)).To(BeTrue()) + + // Delete vlan interface. + genIfaceUpdate("bond0.100", ifacemonitor.StateNotPresent, 11)() + Expect(len(bpfEpMgr.hostIfaces)).To(Equal(2)) + Expect(bpfEpMgr.hostIfaces).To(HaveKey(10)) + Expect(bpfEpMgr.hostIfaces).To(HaveKey(3)) + bondVlanIface = bpfEpMgr.findHostIfaceByIndex(11) + Expect(bondVlanIface).To(BeNil()) + bondIface = bpfEpMgr.findHostIfaceByIndex(10) + eth10Iface = bpfEpMgr.findHostIfaceByIndex(20) + eth20Iface = bpfEpMgr.findHostIfaceByIndex(30) + Expect(bondIface).NotTo(BeNil()) + Expect(eth10Iface).NotTo(BeNil()) + Expect(eth20Iface).NotTo(BeNil()) + Expect(isRootIface(bondIface)).To(BeTrue()) + Expect(isLeafIface(bondIface)).To(BeFalse()) + Expect(isLeafIface(eth10Iface)).To(BeTrue()) + Expect(isLeafIface(eth20Iface)).To(BeTrue()) + + leaves = bpfEpMgr.getBondSlaves("bond0") + Expect(leaves.Len()).To(Equal(2)) + Expect(leaves.Slice()).To(ConsistOf("eth10", "eth20")) + + // Delete bond interface. + genIfaceUpdate("bond0", ifacemonitor.StateNotPresent, 10)() + Expect(len(bpfEpMgr.hostIfaces)).To(Equal(3)) + Expect(bpfEpMgr.hostIfaces).To(HaveKey(3)) + Expect(bpfEpMgr.hostIfaces).To(HaveKey(20)) + Expect(bpfEpMgr.hostIfaces).To(HaveKey(30)) + bondIface = bpfEpMgr.findHostIfaceByIndex(10) + eth10Iface = bpfEpMgr.findHostIfaceByIndex(20) + eth20Iface = bpfEpMgr.findHostIfaceByIndex(30) + Expect(bondIface).To(BeNil()) + Expect(eth10Iface).NotTo(BeNil()) + Expect(eth20Iface).NotTo(BeNil()) + Expect(isLeafIface(eth10Iface)).To(BeTrue()) + Expect(isLeafIface(eth20Iface)).To(BeTrue()) + Expect(isRootIface(eth10Iface)).To(BeTrue()) + Expect(isRootIface(eth20Iface)).To(BeTrue()) + + // Add the interfaces again. + genIfaceUpdate("bond0", ifacemonitor.StateUp, 10)() + genIfaceUpdate("eth10", ifacemonitor.StateUp, 20)() + genIfaceUpdate("eth20", ifacemonitor.StateUp, 30)() + Expect(len(bpfEpMgr.hostIfaces)).To(Equal(2)) + Expect(bpfEpMgr.hostIfaces).To(HaveKey(3)) + Expect(bpfEpMgr.hostIfaces).To(HaveKey(10)) + + genIfaceUpdate("bond0.100", ifacemonitor.StateUp, 11)() + Expect(len(bpfEpMgr.hostIfaces)).To(Equal(2)) + Expect(bpfEpMgr.hostIfaces).To(HaveKey(3)) + Expect(bpfEpMgr.hostIfaces).To(HaveKey(11)) + bondIface = bpfEpMgr.findHostIfaceByIndex(10) + bondVlanIface = bpfEpMgr.findHostIfaceByIndex(11) + eth10Iface = bpfEpMgr.findHostIfaceByIndex(20) + eth20Iface = bpfEpMgr.findHostIfaceByIndex(30) + Expect(bondVlanIface).NotTo(BeNil()) + Expect(bondIface).NotTo(BeNil()) + Expect(eth10Iface).NotTo(BeNil()) + Expect(eth20Iface).NotTo(BeNil()) + Expect(isRootIface(bondVlanIface)).To(BeTrue()) + Expect(isLeafIface(bondVlanIface)).To(BeFalse()) + Expect(isRootIface(bondIface)).To(BeFalse()) + Expect(isLeafIface(bondIface)).To(BeFalse()) + Expect(isRootIface(eth10Iface)).To(BeFalse()) + Expect(isRootIface(eth20Iface)).To(BeFalse()) + Expect(isLeafIface(eth10Iface)).To(BeTrue()) + Expect(isLeafIface(eth20Iface)).To(BeTrue()) + + // Delete the bond, which is neither root not leaf. + genIfaceUpdate("bond0", ifacemonitor.StateNotPresent, 10)() + Expect(len(bpfEpMgr.hostIfaces)).To(Equal(3)) + Expect(bpfEpMgr.hostIfaces).To(HaveKey(3)) + Expect(bpfEpMgr.hostIfaces).To(HaveKey(20)) + Expect(bpfEpMgr.hostIfaces).To(HaveKey(30)) + eth10Iface = bpfEpMgr.findHostIfaceByIndex(20) + eth20Iface = bpfEpMgr.findHostIfaceByIndex(30) + Expect(eth10Iface).NotTo(BeNil()) + Expect(eth20Iface).NotTo(BeNil()) + Expect(isRootIface(eth10Iface)).To(BeTrue()) + Expect(isRootIface(eth20Iface)).To(BeTrue()) + Expect(isLeafIface(eth10Iface)).To(BeTrue()) + Expect(isLeafIface(eth20Iface)).To(BeTrue()) + }) + It("does not have host-* policy on the workload interface", func() { var eth0I, eth0E, eth0X, caliI, caliE *polprog.Rules @@ -782,6 +1044,69 @@ var _ = Describe("BPF Endpoint Manager", func() { }) }) + Context("with bond iface and vlan", func() { + JustBeforeEach(func() { + dp.interfaceByIndexFn = func(ifindex int) (*net.Interface, error) { + if ifindex == 10 { + return &net.Interface{ + Name: "bond0", + Index: 10, + Flags: net.FlagUp, + }, nil + } + if ifindex == 11 { + return &net.Interface{ + Name: "bond0.100", + Index: 11, + Flags: net.FlagUp, + }, nil + } + if ifindex == 20 { + return &net.Interface{ + Name: "eth10", + Index: 20, + Flags: net.FlagUp, + }, nil + } + if ifindex == 30 { + return &net.Interface{ + Name: "eth20", + Index: 30, + Flags: net.FlagUp, + }, nil + } + return nil, errors.New("no such network interface") + } + err := dp.createIface("bond0", 10, "bond") + Expect(err).NotTo(HaveOccurred()) + err = dp.createVlanIface("bond0.100", 11, 10) + Expect(err).NotTo(HaveOccurred()) + err = dp.createBondSlaves("eth10", 20, 10) + Expect(err).NotTo(HaveOccurred()) + err = dp.createBondSlaves("eth20", 30, 10) + Expect(err).NotTo(HaveOccurred()) + dataIfacePattern = "^eth|bond*" + newBpfEpMgr(false) + err = bpfEpMgr.CompleteDeferredWork() + Expect(err).NotTo(HaveOccurred()) + }) + + Context("should attach to bond interface", func() { + It("should attach tc to bond", func() { + genIfaceUpdate("bond0", ifacemonitor.StateUp, 10)() + genIfaceUpdate("eth10", ifacemonitor.StateUp, 20)() + genIfaceUpdate("eth20", ifacemonitor.StateUp, 30)() + Expect(dp.programAttached("bond0:ingress")).To(BeTrue()) + Expect(dp.programAttached("bond0:egress")).To(BeTrue()) + Expect(dp.programAttached("bond0:xdp")).To(BeFalse()) + Expect(dp.programAttached("eth10:ingress")).To(BeFalse()) + Expect(dp.programAttached("eth10:egress")).To(BeFalse()) + Expect(dp.programAttached("eth20:ingress")).To(BeFalse()) + Expect(dp.programAttached("eth20:egress")).To(BeFalse()) + }) + }) + }) + Context("with bond iface", func() { JustBeforeEach(func() { dp.interfaceByIndexFn = func(ifindex int) (*net.Interface, error) { @@ -828,8 +1153,7 @@ var _ = Describe("BPF Endpoint Manager", func() { Expect(dp.programAttached("bond0:ingress")).To(BeTrue()) Expect(dp.programAttached("bond0:egress")).To(BeTrue()) checkIfState(10, "bond0", ifstate.FlgIPv4Ready|ifstate.FlgBond) - xdpID := getPolicyIdx(10, "bond0", "xdp") - Expect(xdpID).To(Equal(-1)) + Expect(dp.programAttached("bond0:xdp")).To(BeFalse()) }) It("should not attach to bond slaves", func() { Expect(dp.programAttached("eth10:ingress")).To(BeFalse()) diff --git a/felix/fv/bpf_test.go b/felix/fv/bpf_test.go index 31f3f65db8b..584bb47d19a 100644 --- a/felix/fv/bpf_test.go +++ b/felix/fv/bpf_test.go @@ -5672,12 +5672,12 @@ func checkServiceRoute(felix *infrastructure.Felix, ip string) bool { return false } -func bpfCheckIfPolicyProgrammed(felix *infrastructure.Felix, iface, hook, polName, action string, isWorkload bool) bool { +func checkIfPolicyProgrammed(felix *infrastructure.Felix, iface, hook, polName, action string, isWorkload bool, ipFamily proto.IPVersion) bool { startStr := fmt.Sprintf("Start of policy %s", polName) endStr := fmt.Sprintf("End of policy %s", polName) actionStr := fmt.Sprintf("Start of rule action:\"%s\"", action) var policyDbg bpf.PolicyDebugInfo - out, err := felix.ExecOutput("cat", bpf.PolicyDebugJSONFileName(iface, hook, proto.IPVersion_IPV4)) + out, err := felix.ExecOutput("cat", bpf.PolicyDebugJSONFileName(iface, hook, ipFamily)) if err != nil { return false } @@ -5721,6 +5721,14 @@ func bpfCheckIfPolicyProgrammed(felix *infrastructure.Felix, iface, hook, polNam return (startOfPolicy && endOfPolicy && actionMatch) } +func bpfCheckIfPolicyProgrammed(felix *infrastructure.Felix, iface, hook, polName, action string, isWorkload bool) bool { + return checkIfPolicyProgrammed(felix, iface, hook, polName, action, isWorkload, proto.IPVersion_IPV4) +} + +func bpfCheckIfPolicyProgrammedV6(felix *infrastructure.Felix, iface, hook, polName, action string, isWorkload bool) bool { + return checkIfPolicyProgrammed(felix, iface, hook, polName, action, isWorkload, proto.IPVersion_IPV6) +} + func bpfDumpPolicy(felix *infrastructure.Felix, iface, hook string) string { var ( out string diff --git a/felix/fv/donottrack_test.go b/felix/fv/donottrack_test.go index 80e3305f767..99fab9b21af 100644 --- a/felix/fv/donottrack_test.go +++ b/felix/fv/donottrack_test.go @@ -379,6 +379,17 @@ var _ = infrastructure.DatastoreDescribe("_BPF-SAFE_ do-not-track policy tests; ensureRightIFStateFlags(felix, ifstate.FlgIPv4Ready|ifstate.FlgIPv6Ready, ifstate.FlgBondSlave, map[string]uint32{"bond0": ifstate.FlgIPv4Ready | ifstate.FlgIPv6Ready | ifstate.FlgBond}) createHostEndpoint(felix, "bond0", []string{felix.IP, felix.IPv6}, client, ctx) } + + for _, felix := range tc.Felixes { + Eventually(func() bool { + return bpfCheckIfPolicyProgrammed(felix, "bond0", "egress", "default.allow-egress", "allow", false) + }, "5s", "200ms").Should(BeTrue()) + + Eventually(func() bool { + return bpfCheckIfPolicyProgrammedV6(felix, "bond0", "egress", "default.allow-egress", "allow", false) + }, "5s", "200ms").Should(BeTrue()) + + } }) AfterEach(func() { diff --git a/felix/netlinkshim/mocknetlink/netlink.go b/felix/netlinkshim/mocknetlink/netlink.go index 9914799b2e3..8af2807284f 100644 --- a/felix/netlinkshim/mocknetlink/netlink.go +++ b/felix/netlinkshim/mocknetlink/netlink.go @@ -520,7 +520,9 @@ func (d *MockNetlinkDataplane) LinkAdd(link netlink.Link) error { return AlreadyExistsError } attrs := *link.Attrs() - attrs.Index = 100 + d.NumLinkAddCalls + if attrs.Index == 0 { + attrs.Index = 100 + d.NumLinkAddCalls + } d.NameToLink[link.Attrs().Name] = &MockLink{ LinkAttrs: attrs, LinkType: link.Type(),