From 4d28391c240c78febb2c26ce4d7bd084e1af6f5e Mon Sep 17 00:00:00 2001 From: Hristo Venev Date: Mon, 26 Oct 2020 01:40:05 -0700 Subject: [PATCH 1/3] Fix double counting of IP addresses The range allocator in pkg/controller/nodeipam/ipam/range_allocator.go may call Occupy() on the same range twice: 1. Just before subscribing to the NodeInformer 2. From a callback given to the NodeInformer soon after registration --- .../nodeipam/ipam/cidrset/cidr_set.go | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/pkg/controller/nodeipam/ipam/cidrset/cidr_set.go b/pkg/controller/nodeipam/ipam/cidrset/cidr_set.go index 359f830e3a6bb..91a946c9eea4b 100644 --- a/pkg/controller/nodeipam/ipam/cidrset/cidr_set.go +++ b/pkg/controller/nodeipam/ipam/cidrset/cidr_set.go @@ -232,16 +232,20 @@ func (s *CidrSet) Release(cidr *net.IPNet) error { s.Lock() defer s.Unlock() for i := begin; i <= end; i++ { - s.used.SetBit(&s.used, i, 0) - s.allocatedCIDRs-- - cidrSetReleases.WithLabelValues(s.label).Inc() + // Only change the counters if we change the bit to prevent + // double counting. + if s.used.Bit(i) != 0 { + s.used.SetBit(&s.used, i, 0) + s.allocatedCIDRs-- + cidrSetReleases.WithLabelValues(s.label).Inc() + } } cidrSetUsage.WithLabelValues(s.label).Set(float64(s.allocatedCIDRs) / float64(s.maxCIDRs)) return nil } -// Occupy marks the given CIDR range as used. Occupy does not check if the CIDR +// Occupy marks the given CIDR range as used. Occupy succeeds even if the CIDR // range was previously used. func (s *CidrSet) Occupy(cidr *net.IPNet) (err error) { begin, end, err := s.getBeginingAndEndIndices(cidr) @@ -251,9 +255,13 @@ func (s *CidrSet) Occupy(cidr *net.IPNet) (err error) { s.Lock() defer s.Unlock() for i := begin; i <= end; i++ { - s.used.SetBit(&s.used, i, 1) - s.allocatedCIDRs++ - cidrSetAllocations.WithLabelValues(s.label).Inc() + // Only change the counters if we change the bit to prevent + // double counting. + if s.used.Bit(i) == 0 { + s.used.SetBit(&s.used, i, 1) + s.allocatedCIDRs++ + cidrSetAllocations.WithLabelValues(s.label).Inc() + } } cidrSetUsage.WithLabelValues(s.label).Set(float64(s.allocatedCIDRs) / float64(s.maxCIDRs)) From ee581278bd652951838378dc75b1d347357939cc Mon Sep 17 00:00:00 2001 From: Hristo Venev Date: Sun, 15 Nov 2020 23:30:58 -0800 Subject: [PATCH 2/3] cidrset: Add test for double counting --- .../nodeipam/ipam/cidrset/cidr_set_test.go | 94 +++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/pkg/controller/nodeipam/ipam/cidrset/cidr_set_test.go b/pkg/controller/nodeipam/ipam/cidrset/cidr_set_test.go index ead684ea15910..52bb209d7d6ec 100644 --- a/pkg/controller/nodeipam/ipam/cidrset/cidr_set_test.go +++ b/pkg/controller/nodeipam/ipam/cidrset/cidr_set_test.go @@ -316,6 +316,8 @@ func TestCIDRSet_AllocationOccupied(t *testing.T) { for i := numCIDRs / 2; i < numCIDRs; i++ { a.Occupy(cidrs[i]) } + // occupy the first of the last 128 again + a.Occupy(cidrs[numCIDRs/2]) // allocate the first 128 CIDRs again var rcidrs []*net.IPNet @@ -341,6 +343,98 @@ func TestCIDRSet_AllocationOccupied(t *testing.T) { } } +func TestDoubleOccupyRelease(t *testing.T) { + // Run a sequence of operations and check the number of occupied CIDRs + // after each one. + clusterCIDRStr := "10.42.0.0/16" + operations := []struct { + cidrStr string + operation string + numOccupied int + }{ + // Occupy 1 element: +1 + { + cidrStr: "10.42.5.0/24", + operation: "occupy", + numOccupied: 1, + }, + // Occupy 1 more element: +1 + { + cidrStr: "10.42.9.0/24", + operation: "occupy", + numOccupied: 2, + }, + // Occupy 4 elements overlapping with one from the above: +3 + { + cidrStr: "10.42.8.0/22", + operation: "occupy", + numOccupied: 5, + }, + // Occupy an already-coccupied element: no change + { + cidrStr: "10.42.9.0/24", + operation: "occupy", + numOccupied: 5, + }, + // Release an coccupied element: -1 + { + cidrStr: "10.42.9.0/24", + operation: "release", + numOccupied: 4, + }, + // Release an unoccupied element: no change + { + cidrStr: "10.42.9.0/24", + operation: "release", + numOccupied: 4, + }, + // Release 4 elements, only one of which is occupied: -1 + { + cidrStr: "10.42.4.0/22", + operation: "release", + numOccupied: 3, + }, + } + // Check that there are exactly that many allocatable CIDRs after all + // operations have been executed. + numAllocatable24s := (1 << 8) - 3 + + _, clusterCIDR, _ := net.ParseCIDR(clusterCIDRStr) + a, err := NewCIDRSet(clusterCIDR, 24) + if err != nil { + t.Fatalf("Error allocating CIDRSet") + } + + // Execute the operations + for _, op := range operations { + _, cidr, _ := net.ParseCIDR(op.cidrStr) + switch op.operation { + case "occupy": + a.Occupy(cidr) + case "release": + a.Release(cidr) + default: + t.Fatalf("test error: unknown operation %v", op.operation) + } + if a.allocatedCIDRs != op.numOccupied { + t.Fatalf("Expected %d occupied CIDRS, got %d", op.numOccupied, a.allocatedCIDRs) + } + } + + // Make sure that we can allocate exactly `numAllocatable24s` elements. + for i := 0; i < numAllocatable24s; i++ { + _, err := a.AllocateNext() + if err != nil { + t.Fatalf("Expected to be able to allocate %d CIDRS, failed after %d", numAllocatable24s, i) + } + } + + _, err = a.AllocateNext() + if err == nil { + t.Fatalf("Expected to be able to allocate exactly %d CIDRS, got one more", numAllocatable24s) + } +} + func TestGetBitforCIDR(t *testing.T) { cases := []struct { clusterCIDRStr string From c8c81be8af90db3e0aba9d4e5eb5638cbe0e2757 Mon Sep 17 00:00:00 2001 From: Hristo Venev Date: Sun, 15 Nov 2020 23:37:43 -0800 Subject: [PATCH 3/3] range_allocator: Test (lack of) double counting --- .../nodeipam/ipam/range_allocator_test.go | 63 +++++++++++++++++-- 1 file changed, 58 insertions(+), 5 deletions(-) diff --git a/pkg/controller/nodeipam/ipam/range_allocator_test.go b/pkg/controller/nodeipam/ipam/range_allocator_test.go index 92cf51af2d4d5..e72b7707dca89 100644 --- a/pkg/controller/nodeipam/ipam/range_allocator_test.go +++ b/pkg/controller/nodeipam/ipam/range_allocator_test.go @@ -486,12 +486,55 @@ func TestAllocateOrOccupyCIDRSuccess(t *testing.T) { NodeCIDRMaskSizes: []int{24, 98, 24}, }, }, + { + description: "no double counting", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + }, + Spec: v1.NodeSpec{ + PodCIDRs: []string{"10.10.0.0/24"}, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + }, + Spec: v1.NodeSpec{ + PodCIDRs: []string{"10.10.2.0/24"}, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ClusterCIDRs: func() []*net.IPNet { + _, clusterCIDR, _ := net.ParseCIDR("10.10.0.0/22") + return []*net.IPNet{clusterCIDR} + }(), + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + NodeCIDRMaskSizes: []int{24}, + }, + expectedAllocatedCIDR: map[int]string{ + 0: "10.10.1.0/24", + }, + }, } // test function testFunc := func(tc testCase) { + fakeNodeInformer := getFakeNodeInformer(tc.fakeNodeHandler) + nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{}) // Initialize the range allocator. - allocator, err := NewCIDRRangeAllocator(tc.fakeNodeHandler, getFakeNodeInformer(tc.fakeNodeHandler), tc.allocatorParams, nil) + allocator, err := NewCIDRRangeAllocator(tc.fakeNodeHandler, fakeNodeInformer, tc.allocatorParams, nodeList) if err != nil { t.Errorf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err) return @@ -517,13 +560,23 @@ func TestAllocateOrOccupyCIDRSuccess(t *testing.T) { t.Fatalf("%v: unexpected error when occupying CIDR %v: %v", tc.description, allocated, err) } } - if err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err != nil { - t.Errorf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err) + } + + updateCount := 0 + for _, node := range tc.fakeNodeHandler.Existing { + if node.Spec.PodCIDRs == nil { + updateCount++ } - if err := waitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil { - t.Fatalf("%v: timeout while waiting for Node update: %v", tc.description, err) + if err := allocator.AllocateOrOccupyCIDR(node); err != nil { + t.Errorf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err) } } + if updateCount != 1 { + t.Fatalf("test error: all tests must update exactly one node") + } + if err := waitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, updateCount, wait.ForeverTestTimeout); err != nil { + t.Fatalf("%v: timeout while waiting for Node update: %v", tc.description, err) + } if len(tc.expectedAllocatedCIDR) == 0 { // nothing further expected