diff --git a/build/charts/antrea/conf/antrea-agent.conf b/build/charts/antrea/conf/antrea-agent.conf index 6bbe0824a23..f56413c67f0 100644 --- a/build/charts/antrea/conf/antrea-agent.conf +++ b/build/charts/antrea/conf/antrea-agent.conf @@ -306,6 +306,11 @@ kubeAPIServerOverride: {{ .Values.kubeAPIServerOverride | quote }} # 10.96.0.10:53, [fd00:10:96::a]:53). dnsServerOverride: {{ .Values.dnsServerOverride | quote }} +# The fqdnCacheMinTTL helps address the problem of applications caching DNS response IPs beyond the TTL value for the DNS record. +# It is used to enforce FQDN policy rules, ensuring that resolved IPs are included in datapath rules for as long as the application is caching them. +# This value should ideally be set to the maximum caching duration across all applications. +fqdnCacheMinTTL: {{ .Values.fqdnCacheMinTTL }} + # Comma-separated list of Cipher Suites. If omitted, the default Go Cipher Suites will be used. # https://golang.org/pkg/crypto/tls/#pkg-constants # Note that TLS1.3 Cipher Suites cannot be added to the list. But the apiserver will always diff --git a/build/charts/antrea/values.yaml b/build/charts/antrea/values.yaml index a4cf6a6db83..884c91ecc35 100644 --- a/build/charts/antrea/values.yaml +++ b/build/charts/antrea/values.yaml @@ -186,6 +186,10 @@ kubeAPIServerOverride: "" # -- Address of DNS server, to override the kube-dns Service. It's used to # resolve hostnames in a FQDN policy. dnsServerOverride: "" +# -- The minTTL setting helps address the problem of applications caching DNS response IPs indefinitely. +# The Cluster administrators should configure this value, ideally setting it to be equal to or greater than the maximum TTL +# value of the application's DNS cache. +fqdnCacheMinTTL: 0 # -- IPv4 CIDR range used for Services. Required when AntreaProxy is disabled. serviceCIDR: "" # -- IPv6 CIDR range used for Services. Required when AntreaProxy is disabled. diff --git a/build/yamls/antrea-aks.yml b/build/yamls/antrea-aks.yml index e30c8c27f9b..d5103c64de7 100644 --- a/build/yamls/antrea-aks.yml +++ b/build/yamls/antrea-aks.yml @@ -4234,6 +4234,11 @@ data: # 10.96.0.10:53, [fd00:10:96::a]:53). dnsServerOverride: "" + # The fqdnCacheMinTTL setting helps address the problem of applications caching DNS response IPs beyond the TTL value for the DNS record. + # It is used to enforce FQDN policy rules, ensuring that resolved IPs are included in datapath rules for as long as the application is caching them. + # This value should ideally be set to the maximum caching duration across all applications. + fqdnCacheMinTTL: 0 + # Comma-separated list of Cipher Suites. If omitted, the default Go Cipher Suites will be used. # https://golang.org/pkg/crypto/tls/#pkg-constants # Note that TLS1.3 Cipher Suites cannot be added to the list. But the apiserver will always @@ -5383,7 +5388,7 @@ spec: kubectl.kubernetes.io/default-container: antrea-agent # Automatically restart Pods with a RollingUpdate if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: e2d1d8af083c88667ac4c22c87dea63e595b2f4f770190c32afb00c480440fe3 + checksum/config: e110c8521f19cd9446a44a03bd30688f9dc9925a22e4663865a00ae57eb45321 labels: app: antrea component: antrea-agent @@ -5621,7 +5626,7 @@ spec: annotations: # Automatically restart Pod if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: e2d1d8af083c88667ac4c22c87dea63e595b2f4f770190c32afb00c480440fe3 + checksum/config: e110c8521f19cd9446a44a03bd30688f9dc9925a22e4663865a00ae57eb45321 labels: app: antrea component: antrea-controller diff --git a/build/yamls/antrea-eks.yml b/build/yamls/antrea-eks.yml index 32e4cd34bb7..2b79c995dbd 100644 --- a/build/yamls/antrea-eks.yml +++ b/build/yamls/antrea-eks.yml @@ -4234,6 +4234,11 @@ data: # 10.96.0.10:53, [fd00:10:96::a]:53). dnsServerOverride: "" + # The fqdnCacheMinTTL setting helps address the problem of applications caching DNS response IPs beyond the TTL value for the DNS record. + # It is used to enforce FQDN policy rules, ensuring that resolved IPs are included in datapath rules for as long as the application is caching them. + # This value should ideally be set to the maximum caching duration across all applications. + fqdnCacheMinTTL: 0 + # Comma-separated list of Cipher Suites. If omitted, the default Go Cipher Suites will be used. # https://golang.org/pkg/crypto/tls/#pkg-constants # Note that TLS1.3 Cipher Suites cannot be added to the list. But the apiserver will always @@ -5383,7 +5388,7 @@ spec: kubectl.kubernetes.io/default-container: antrea-agent # Automatically restart Pods with a RollingUpdate if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: e2d1d8af083c88667ac4c22c87dea63e595b2f4f770190c32afb00c480440fe3 + checksum/config: e110c8521f19cd9446a44a03bd30688f9dc9925a22e4663865a00ae57eb45321 labels: app: antrea component: antrea-agent @@ -5622,7 +5627,7 @@ spec: annotations: # Automatically restart Pod if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: e2d1d8af083c88667ac4c22c87dea63e595b2f4f770190c32afb00c480440fe3 + checksum/config: e110c8521f19cd9446a44a03bd30688f9dc9925a22e4663865a00ae57eb45321 labels: app: antrea component: antrea-controller diff --git a/build/yamls/antrea-gke.yml b/build/yamls/antrea-gke.yml index a1733ffd279..df838cd5bfa 100644 --- a/build/yamls/antrea-gke.yml +++ b/build/yamls/antrea-gke.yml @@ -4234,6 +4234,11 @@ data: # 10.96.0.10:53, [fd00:10:96::a]:53). dnsServerOverride: "" + # The fqdnCacheMinTTL setting helps address the problem of applications caching DNS response IPs beyond the TTL value for the DNS record. + # It is used to enforce FQDN policy rules, ensuring that resolved IPs are included in datapath rules for as long as the application is caching them. + # This value should ideally be set to the maximum caching duration across all applications. + fqdnCacheMinTTL: 0 + # Comma-separated list of Cipher Suites. If omitted, the default Go Cipher Suites will be used. # https://golang.org/pkg/crypto/tls/#pkg-constants # Note that TLS1.3 Cipher Suites cannot be added to the list. But the apiserver will always @@ -5383,7 +5388,7 @@ spec: kubectl.kubernetes.io/default-container: antrea-agent # Automatically restart Pods with a RollingUpdate if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: 7e42a403d388e2ed556d9b41f4af83917eadd0863d4e2bef67353f5adb2ef6c3 + checksum/config: 46818c48f6a155057238465531d114fac58b552fa27d643e04e6272e45a22671 labels: app: antrea component: antrea-agent @@ -5619,7 +5624,7 @@ spec: annotations: # Automatically restart Pod if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: 7e42a403d388e2ed556d9b41f4af83917eadd0863d4e2bef67353f5adb2ef6c3 + checksum/config: 46818c48f6a155057238465531d114fac58b552fa27d643e04e6272e45a22671 labels: app: antrea component: antrea-controller diff --git a/build/yamls/antrea-ipsec.yml b/build/yamls/antrea-ipsec.yml index 92d831c2263..65b4af1c152 100644 --- a/build/yamls/antrea-ipsec.yml +++ b/build/yamls/antrea-ipsec.yml @@ -4247,6 +4247,11 @@ data: # 10.96.0.10:53, [fd00:10:96::a]:53). dnsServerOverride: "" + # The fqdnCacheMinTTL setting helps address the problem of applications caching DNS response IPs beyond the TTL value for the DNS record. + # It is used to enforce FQDN policy rules, ensuring that resolved IPs are included in datapath rules for as long as the application is caching them. + # This value should ideally be set to the maximum caching duration across all applications. + fqdnCacheMinTTL: 0 + # Comma-separated list of Cipher Suites. If omitted, the default Go Cipher Suites will be used. # https://golang.org/pkg/crypto/tls/#pkg-constants # Note that TLS1.3 Cipher Suites cannot be added to the list. But the apiserver will always @@ -5396,7 +5401,7 @@ spec: kubectl.kubernetes.io/default-container: antrea-agent # Automatically restart Pods with a RollingUpdate if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: 7d8b0a065c3db85e34e127fdf38b820b32712657900e3f8fe2703d4310c40632 + checksum/config: 833415719521cc942d7007330bd1a2a87b200055e35851cdbf4851e68ae41605 checksum/ipsec-secret: d0eb9c52d0cd4311b6d252a951126bf9bea27ec05590bed8a394f0f792dcb2a4 labels: app: antrea @@ -5678,7 +5683,7 @@ spec: annotations: # Automatically restart Pod if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: 7d8b0a065c3db85e34e127fdf38b820b32712657900e3f8fe2703d4310c40632 + checksum/config: 833415719521cc942d7007330bd1a2a87b200055e35851cdbf4851e68ae41605 labels: app: antrea component: antrea-controller diff --git a/build/yamls/antrea.yml b/build/yamls/antrea.yml index deea6342fdf..8c7438d835d 100644 --- a/build/yamls/antrea.yml +++ b/build/yamls/antrea.yml @@ -4234,6 +4234,11 @@ data: # 10.96.0.10:53, [fd00:10:96::a]:53). dnsServerOverride: "" + # The fqdnCacheMinTTL setting helps address the problem of applications caching DNS response IPs beyond the TTL value for the DNS record. + # It is used to enforce FQDN policy rules, ensuring that resolved IPs are included in datapath rules for as long as the application is caching them. + # This value should ideally be set to the maximum caching duration across all applications. + fqdnCacheMinTTL: 0 + # Comma-separated list of Cipher Suites. If omitted, the default Go Cipher Suites will be used. # https://golang.org/pkg/crypto/tls/#pkg-constants # Note that TLS1.3 Cipher Suites cannot be added to the list. But the apiserver will always @@ -5383,7 +5388,7 @@ spec: kubectl.kubernetes.io/default-container: antrea-agent # Automatically restart Pods with a RollingUpdate if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: 2b4d82bcb825d50926115bad2125097f85aed424bfc49147444314cad8b7826a + checksum/config: 6fd8efc7007f902f7d846353d9386224d8abadd1d8694d2e5702d8328804c330 labels: app: antrea component: antrea-agent @@ -5619,7 +5624,7 @@ spec: annotations: # Automatically restart Pod if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: 2b4d82bcb825d50926115bad2125097f85aed424bfc49147444314cad8b7826a + checksum/config: 6fd8efc7007f902f7d846353d9386224d8abadd1d8694d2e5702d8328804c330 labels: app: antrea component: antrea-controller diff --git a/cmd/antrea-agent/agent.go b/cmd/antrea-agent/agent.go index 9b0ab2db775..c53649a6482 100644 --- a/cmd/antrea-agent/agent.go +++ b/cmd/antrea-agent/agent.go @@ -528,6 +528,7 @@ func run(o *Options) error { nodeConfig, podNetworkWait, l7Reconciler, + uint32(o.config.FqdnCacheMinTTL), ) if err != nil { return fmt.Errorf("error creating new NetworkPolicy controller: %v", err) diff --git a/cmd/antrea-agent/options.go b/cmd/antrea-agent/options.go index a091fdd7068..1d16a2c88d1 100644 --- a/cmd/antrea-agent/options.go +++ b/cmd/antrea-agent/options.go @@ -155,6 +155,11 @@ func (o *Options) validate(args []string) error { return fmt.Errorf("nodeType %s requires feature gate ExternalNode to be enabled", o.config.NodeType) } + // validate FqdnCacheMinTTL + if o.config.FqdnCacheMinTTL < 0 { + return fmt.Errorf("fqdnCacheMinTTL must be greater than or equal to 0") + } + if o.config.NodeType == config.ExternalNode.String() { o.nodeType = config.ExternalNode return o.validateExternalNodeOptions() diff --git a/pkg/agent/controller/networkpolicy/fqdn.go b/pkg/agent/controller/networkpolicy/fqdn.go index 8c077c707fe..8bccb9706cf 100644 --- a/pkg/agent/controller/networkpolicy/fqdn.go +++ b/pkg/agent/controller/networkpolicy/fqdn.go @@ -127,6 +127,7 @@ type fqdnController struct { ofClient openflow.Client // dnsServerAddr stores the coreDNS server address, or the user provided DNS server address. dnsServerAddr string + minTTL uint32 // dirtyRuleHandler is a callback that is run upon finding a rule out-of-sync. dirtyRuleHandler func(string) @@ -160,7 +161,7 @@ type fqdnController struct { clock clock.Clock } -func newFQDNController(client openflow.Client, allocator *idAllocator, dnsServerOverride string, dirtyRuleHandler func(string), v4Enabled, v6Enabled bool, gwPort uint32, clock clock.WithTicker) (*fqdnController, error) { +func newFQDNController(client openflow.Client, allocator *idAllocator, dnsServerOverride string, dirtyRuleHandler func(string), v4Enabled, v6Enabled bool, gwPort uint32, clock clock.WithTicker, fqdnCacheMinTTL uint32) (*fqdnController, error) { controller := &fqdnController{ ofClient: client, dirtyRuleHandler: dirtyRuleHandler, @@ -182,6 +183,7 @@ func newFQDNController(client openflow.Client, allocator *idAllocator, dnsServer ipv6Enabled: v6Enabled, gwPort: gwPort, clock: clock, + minTTL: fqdnCacheMinTTL, } if controller.ofClient != nil { if err := controller.ofClient.NewDNSPacketInConjunction(dnsInterceptRuleID); err != nil { @@ -643,7 +645,7 @@ func (f *fqdnController) parseDNSResponse(msg *dns.Msg) (string, map[string]ipWi if f.ipv4Enabled { responseIPs[r.A.String()] = ipWithExpiration{ ip: r.A, - expirationTime: currentTime.Add(time.Duration(r.Header().Ttl) * time.Second), + expirationTime: currentTime.Add(time.Duration(max(f.minTTL, r.Header().Ttl)) * time.Second), } } @@ -651,7 +653,7 @@ func (f *fqdnController) parseDNSResponse(msg *dns.Msg) (string, map[string]ipWi if f.ipv6Enabled { responseIPs[r.AAAA.String()] = ipWithExpiration{ ip: r.AAAA, - expirationTime: currentTime.Add(time.Duration(r.Header().Ttl) * time.Second), + expirationTime: currentTime.Add(time.Duration(max(f.minTTL, r.Header().Ttl)) * time.Second), } } } diff --git a/pkg/agent/controller/networkpolicy/fqdn_test.go b/pkg/agent/controller/networkpolicy/fqdn_test.go index 55e6a21a058..eb87518080a 100644 --- a/pkg/agent/controller/networkpolicy/fqdn_test.go +++ b/pkg/agent/controller/networkpolicy/fqdn_test.go @@ -21,6 +21,7 @@ import ( "testing" "time" + "github.com/miekg/dns" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uber.org/mock/gomock" @@ -32,7 +33,8 @@ import ( openflowtest "antrea.io/antrea/pkg/agent/openflow/testing" ) -func newMockFQDNController(t *testing.T, controller *gomock.Controller, dnsServer *string, clockToInject clock.WithTicker) (*fqdnController, *openflowtest.MockClient) { +func newMockFQDNController(t *testing.T, controller *gomock.Controller, dnsServer *string, + clockToInject clock.WithTicker, fqdnCacheMinTTL uint32) (*fqdnController, *openflowtest.MockClient) { mockOFClient := openflowtest.NewMockClient(controller) mockOFClient.EXPECT().NewDNSPacketInConjunction(gomock.Any()).Return(nil).AnyTimes() dirtyRuleHandler := func(rule string) {} @@ -52,6 +54,7 @@ func newMockFQDNController(t *testing.T, controller *gomock.Controller, dnsServe false, config.DefaultHostGatewayOFPort, clockToInject, + fqdnCacheMinTTL, ) require.NoError(t, err) return f, mockOFClient @@ -170,7 +173,7 @@ func TestAddFQDNRule(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { controller := gomock.NewController(t) - f, c := newMockFQDNController(t, controller, nil, nil) + f, c := newMockFQDNController(t, controller, nil, nil, 0) if tt.addressAdded { c.EXPECT().AddAddressToDNSConjunction(dnsInterceptRuleID, gomock.Any()).Times(1) } @@ -331,7 +334,7 @@ func TestDeleteFQDNRule(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { controller := gomock.NewController(t) - f, c := newMockFQDNController(t, controller, nil, nil) + f, c := newMockFQDNController(t, controller, nil, nil, 0) c.EXPECT().AddAddressToDNSConjunction(dnsInterceptRuleID, gomock.Any()).Times(len(tt.previouslyAddedRules)) f.dnsEntryCache = tt.existingDNSCache if tt.addressRemoved { @@ -350,7 +353,7 @@ func TestDeleteFQDNRule(t *testing.T) { func TestLookupIPFallback(t *testing.T) { controller := gomock.NewController(t) dnsServer := "" // force a fallback to local resolver - f, _ := newMockFQDNController(t, controller, &dnsServer, nil) + f, _ := newMockFQDNController(t, controller, &dnsServer, nil, 0) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() // not ideal as a unit test because it requires the ability to resolve @@ -427,7 +430,7 @@ func TestGetIPsForFQDNSelectors(t *testing.T) { for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { controller := gomock.NewController(t) - f, _ := newMockFQDNController(t, controller, nil, nil) + f, _ := newMockFQDNController(t, controller, nil, nil, 0) if tc.existingSelectorItemToFQDN != nil { f.selectorItemToFQDN = tc.existingSelectorItemToFQDN } @@ -545,7 +548,7 @@ func TestSyncDirtyRules(t *testing.T) { for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { controller := gomock.NewController(t) - f, _ := newMockFQDNController(t, controller, nil, nil) + f, _ := newMockFQDNController(t, controller, nil, nil, 0) var dirtyRuleSyncCalls []string f.dirtyRuleHandler = func(s string) { dirtyRuleSyncCalls = append(dirtyRuleSyncCalls, s) @@ -713,7 +716,7 @@ func TestOnDNSResponse(t *testing.T) { t.Run(tc.name, func(t *testing.T) { fakeClock := newFakeClock(currentTime) controller := gomock.NewController(t) - f, _ := newMockFQDNController(t, controller, nil, fakeClock) + f, _ := newMockFQDNController(t, controller, nil, fakeClock, 0) f.dnsEntryCache = tc.existingDNSCache if tc.mockSelectorToRuleIDs != nil { f.selectorItemToRuleIDs = tc.mockSelectorToRuleIDs @@ -744,3 +747,111 @@ func TestOnDNSResponse(t *testing.T) { }) } } +func TestFQDNCacheMinTTL(t *testing.T) { + currentTime := time.Now() + testIP := "192.168.1.1" + testFQDN := "fqdn-test-pod.lfx.test" + tests := []struct { + name string + expectedTTL time.Time + fqdnCacheMinTTL uint32 + dnsMsg *dns.Msg + }{ + { + name: "Response TTL less than FQDNCacheTTL", + expectedTTL: currentTime.Add(10 * time.Second), + fqdnCacheMinTTL: 10, + dnsMsg: &dns.Msg{ + Question: []dns.Question{ + {Name: testFQDN, Qtype: dns.TypeA, Qclass: dns.ClassINET}, + }, + Answer: []dns.RR{ + &dns.A{ + Hdr: dns.RR_Header{ + Name: testFQDN, + Rrtype: dns.TypeA, + Class: dns.ClassINET, + Ttl: 5, + }, + A: net.ParseIP(testIP), + }, + }, + }, + }, + { + name: "Response TTL more than FQDNCacheTTL", + expectedTTL: currentTime.Add(10 * time.Second), + fqdnCacheMinTTL: 5, + dnsMsg: &dns.Msg{ + Question: []dns.Question{ + {Name: testFQDN, Qtype: dns.TypeA, Qclass: dns.ClassINET}, + }, + Answer: []dns.RR{ + &dns.A{ + Hdr: dns.RR_Header{ + Name: testFQDN, + Rrtype: dns.TypeA, + Class: dns.ClassINET, + Ttl: 10, + }, + A: net.ParseIP(testIP), + }, + }, + }, + }, + { + name: "Response TTL equal to FQDNCacheTTL", + expectedTTL: currentTime.Add(5 * time.Second), + fqdnCacheMinTTL: 5, + dnsMsg: &dns.Msg{ + Question: []dns.Question{ + {Name: testFQDN, Qtype: dns.TypeA, Qclass: dns.ClassINET}, + }, + Answer: []dns.RR{ + &dns.A{ + Hdr: dns.RR_Header{ + Name: testFQDN, + Rrtype: dns.TypeA, + Class: dns.ClassINET, + Ttl: 5, + }, + A: net.ParseIP(testIP), + }, + }, + }, + }, + { + name: "FQDNCacheTTL is not set", + expectedTTL: currentTime.Add(5 * time.Second), + fqdnCacheMinTTL: 0, + dnsMsg: &dns.Msg{ + Question: []dns.Question{ + {Name: testFQDN, Qtype: dns.TypeA, Qclass: dns.ClassINET}, + }, + Answer: []dns.RR{ + &dns.A{ + Hdr: dns.RR_Header{ + Name: testFQDN, + Rrtype: dns.TypeA, + Class: dns.ClassINET, + Ttl: 5, + }, + A: net.ParseIP(testIP), + }, + }, + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + fakeClock := newFakeClock(currentTime) + controller := gomock.NewController(t) + f, _ := newMockFQDNController(t, controller, nil, fakeClock, tc.fqdnCacheMinTTL) + require.Zero(t, fakeClock.TimersAdded()) + _, responseIPs, err := f.parseDNSResponse(tc.dnsMsg) + assert.NoError(t, err) + assert.Equal(t, tc.expectedTTL, responseIPs[testIP].expirationTime) + }) + } +} diff --git a/pkg/agent/controller/networkpolicy/networkpolicy_controller.go b/pkg/agent/controller/networkpolicy/networkpolicy_controller.go index 9b5308e549f..949496d7f29 100644 --- a/pkg/agent/controller/networkpolicy/networkpolicy_controller.go +++ b/pkg/agent/controller/networkpolicy/networkpolicy_controller.go @@ -196,7 +196,7 @@ func NewNetworkPolicyController(antreaClientGetter client.AntreaClientProvider, gwPort, tunPort uint32, nodeConfig *config.NodeConfig, podNetworkWait *utilwait.Group, - l7Reconciler *l7engine.Reconciler) (*Controller, error) { + l7Reconciler *l7engine.Reconciler, fqdnCacheMinTTL uint32) (*Controller, error) { idAllocator := newIDAllocator(asyncRuleDeleteInterval, dnsInterceptRuleID) c := &Controller{ antreaClientProvider: antreaClientGetter, @@ -227,7 +227,7 @@ func NewNetworkPolicyController(antreaClientGetter client.AntreaClientProvider, var err error if antreaPolicyEnabled { - if c.fqdnController, err = newFQDNController(ofClient, idAllocator, dnsServerOverride, c.enqueueRule, v4Enabled, v6Enabled, gwPort, clock.RealClock{}); err != nil { + if c.fqdnController, err = newFQDNController(ofClient, idAllocator, dnsServerOverride, c.enqueueRule, v4Enabled, v6Enabled, gwPort, clock.RealClock{}, fqdnCacheMinTTL); err != nil { return nil, err } diff --git a/pkg/agent/controller/networkpolicy/networkpolicy_controller_test.go b/pkg/agent/controller/networkpolicy/networkpolicy_controller_test.go index 50b2fc3a16c..64eb5c97972 100644 --- a/pkg/agent/controller/networkpolicy/networkpolicy_controller_test.go +++ b/pkg/agent/controller/networkpolicy/networkpolicy_controller_test.go @@ -105,7 +105,8 @@ func newTestController() (*Controller, *fake.Clientset, *mockReconciler) { config.DefaultTunOFPort, &config.NodeConfig{}, wait.NewGroup(), - l7reconciler) + l7reconciler, + 0) reconciler := newMockReconciler() controller.podReconciler = reconciler controller.auditLogger = nil diff --git a/pkg/agent/controller/networkpolicy/pod_reconciler_test.go b/pkg/agent/controller/networkpolicy/pod_reconciler_test.go index 390be056682..da0052a3b04 100644 --- a/pkg/agent/controller/networkpolicy/pod_reconciler_test.go +++ b/pkg/agent/controller/networkpolicy/pod_reconciler_test.go @@ -108,7 +108,7 @@ func newCIDR(cidrStr string) *net.IPNet { } func newTestReconciler(t *testing.T, controller *gomock.Controller, ifaceStore interfacestore.InterfaceStore, ofClient *openflowtest.MockClient, v4Enabled, v6Enabled bool) *podReconciler { - f, _ := newMockFQDNController(t, controller, nil, nil) + f, _ := newMockFQDNController(t, controller, nil, nil, 0) ch := make(chan string, 100) groupIDAllocator := openflow.NewGroupAllocator() groupCounters := []proxytypes.GroupCounter{proxytypes.NewGroupCounter(groupIDAllocator, ch)} diff --git a/pkg/config/agent/config.go b/pkg/config/agent/config.go index 79fdf48b644..3fe8cde396d 100644 --- a/pkg/config/agent/config.go +++ b/pkg/config/agent/config.go @@ -155,6 +155,10 @@ type AgentConfig struct { // Defaults to "". It must be a host string or a host:port pair of the DNS server (e.g. 10.96.0.10, // 10.96.0.10:53, [fd00:10:96::a]:53). DNSServerOverride string `yaml:"dnsServerOverride,omitempty"` + // The minTTL setting helps address the problem of applications caching DNS response IPs indefinitely. + // The Cluster administrators should configure this value, ideally setting it to be equal to or greater than the maximum TTL + // value of the application's DNS cache. + FqdnCacheMinTTL int `yaml:"FQDNCacheMinTTL,omitempty"` // Cipher suites to use. TLSCipherSuites string `yaml:"tlsCipherSuites,omitempty"` // TLS min version. diff --git a/test/e2e/antreapolicy_test.go b/test/e2e/antreapolicy_test.go index 7f7c44829bb..842137cd9c6 100644 --- a/test/e2e/antreapolicy_test.go +++ b/test/e2e/antreapolicy_test.go @@ -5270,8 +5270,12 @@ func testAntreaClusterNetworkPolicyStats(t *testing.T, data *TestData) { k8sUtils.Cleanup(namespaces) } -// TestFQDNCacheMinTTL tests stable FQDN access for applications with cached DNS resolutions -// when FQDN NetworkPolicy are in use and the FQDN-to-IP resolution changes frequently. +// TestFQDNCacheMinTTL ensures stable FQDN access for applications that cache DNS resolutions, +// even when FQDN-to-IP mappings change frequently, and FQDN-based NetworkPolicies are in use. +// It validates the functionality of the new minTTL configuration, which is used for scenarios +// where applications may cache DNS responses beyond the TTL defined in original DNS response. +// The minTTL value enforces that resolved IPs remain in datapath rules for as long as +// applications might cache them, thereby preventing intermittent network connectivity issues to the FQDN concerned. func TestFQDNCacheMinTTL(t *testing.T) { const ( testFQDN = "fqdn-test-pod.lfx.test" @@ -5368,14 +5372,13 @@ func TestFQDNCacheMinTTL(t *testing.T) { require.NoError(t, data.setPodAnnotation(data.testNamespace, "custom-dns-server", "test.antrea.io/random-value", randSeq(8)), "failed to update custom DNS Pod annotation.") - // finally verify that Curling the previously cached IP fails after DNS update. + // finally verify that Curling the previously cached IP does not fail after DNS update. // The wait time here should be slightly longer than the reload value specified in the custom DNS configuration. - // TODO: This assertion currently verifies the issue described in https://github.com/antrea-io/antrea/issues/6229. - // It will need to be updated once minTTL support is implemented. + // TODO: This assertion verifies the fix to the issue described in https://github.com/antrea-io/antrea/issues/6229. t.Logf("Trying to curl the existing cached IP of the domain: %s", fqdnIP) assert.EventuallyWithT(t, func(t *assert.CollectT) { _, err := curlFQDN(fqdnIP) - assert.Error(t, err) + assert.NoError(t, err) }, 10*time.Second, 1*time.Second) }