From 811f653793b7d8fcc2b3bc71842456ad6c09bc20 Mon Sep 17 00:00:00 2001 From: Kim Sondrup Date: Mon, 19 Aug 2024 18:45:54 +0200 Subject: [PATCH] feat: endpoint DomainFilter convert Punycode to Unicode --- endpoint/domain_filter.go | 21 ++++- endpoint/domain_filter_test.go | 135 ++++++++++++++++++++++++++++++++- 2 files changed, 150 insertions(+), 6 deletions(-) diff --git a/endpoint/domain_filter.go b/endpoint/domain_filter.go index 3acfbcd939..2e9ce6967e 100644 --- a/endpoint/domain_filter.go +++ b/endpoint/domain_filter.go @@ -23,6 +23,9 @@ import ( "regexp" "sort" "strings" + + log "github.com/sirupsen/logrus" + "golang.org/x/net/idna" ) type MatchAllDomainFilters []DomainFilterInterface @@ -69,7 +72,7 @@ type domainFilterSerde struct { func prepareFilters(filters []string) []string { var fs []string for _, filter := range filters { - if domain := strings.ToLower(strings.TrimSuffix(strings.TrimSpace(filter), ".")); domain != "" { + if domain := normalizeDomain(strings.TrimSpace(filter)); domain != "" { fs = append(fs, domain) } } @@ -109,7 +112,7 @@ func matchFilter(filters []string, domain string, emptyval bool) bool { return emptyval } - strippedDomain := strings.ToLower(strings.TrimSuffix(domain, ".")) + strippedDomain := normalizeDomain(domain) for _, filter := range filters { if filter == "" { continue @@ -133,7 +136,7 @@ func matchFilter(filters []string, domain string, emptyval bool) bool { // only regex regular expression matches the domain // Otherwise, if either negativeRegex matches or regex does not match the domain, it returns false func matchRegex(regex *regexp.Regexp, negativeRegex *regexp.Regexp, domain string) bool { - strippedDomain := strings.ToLower(strings.TrimSuffix(domain, ".")) + strippedDomain := normalizeDomain(domain) if negativeRegex != nil && negativeRegex.String() != "" { return !negativeRegex.MatchString(strippedDomain) @@ -214,7 +217,7 @@ func (df DomainFilter) MatchParent(domain string) bool { return true } - strippedDomain := strings.ToLower(strings.TrimSuffix(domain, ".")) + strippedDomain := normalizeDomain(domain) for _, filter := range df.Filters { if filter == "" || strings.HasPrefix(filter, ".") { // We don't check parents if the filter is prefixed with "." @@ -226,3 +229,13 @@ func (df DomainFilter) MatchParent(domain string) bool { } return false } + +// normalizeDomain converts a domain to a canonical form, so that we can filter on it +// it: trim "." suffix, get Unicode version of domain complient with Section 5 of RFC 5891 +func normalizeDomain(domain string) string { + s, err := idna.Lookup.ToUnicode(strings.TrimSuffix(domain, ".")) + if err != nil { + log.Warnf(`Got error while parsing domain %s: %v`, domain, err) + } + return s +} diff --git a/endpoint/domain_filter_test.go b/endpoint/domain_filter_test.go index 58f0e99d9b..92108bbc19 100644 --- a/endpoint/domain_filter_test.go +++ b/endpoint/domain_filter_test.go @@ -247,6 +247,26 @@ var domainFilterTests = []domainFilterTest{ "exclude": {".api.example.org"}, }, }, + { + []string{"æøå.org"}, + []string{"api.æøå.org"}, + []string{"foo.api.æøå.org", "api.æøå.org"}, + false, + map[string][]string{ + "include": {"æøå.org"}, + "exclude": {"api.æøå.org"}, + }, + }, + { + []string{" æøå.org. "}, + []string{" .api.æøå.org "}, + []string{"foo.api.æøå.org", "bar.baz.api.æøå.org."}, + false, + map[string][]string{ + "include": {"æøå.org"}, + "exclude": {".api.æøå.org"}, + }, + }, { []string{"example.org."}, []string{"api.example.org"}, @@ -297,6 +317,16 @@ var domainFilterTests = []domainFilterTest{ "exclude": {"foo-bar.example.org"}, }, }, + { + []string{"sTOnks📈.ORG", "API.xn--StonkS-u354e.ORG"}, + []string{"Foo-Bar.stoNks📈.Org"}, + []string{"FoOoo.Api.Stonks📈.Org"}, + true, + map[string][]string{ + "include": {"api.stonks📈.org", "stonks📈.org"}, + "exclude": {"foo-bar.stonks📈.org"}, + }, + }, { []string{"eXaMPle.ORG", "API.example.ORG"}, []string{"api.example.org"}, @@ -347,6 +377,25 @@ var regexDomainFilterTests = []regexDomainFilterTest{ "regexInclude": "(?:foo|bar)\\.org$", }, }, + { + regexp.MustCompile("(?:😍|🤩)\\.org$"), + regexp.MustCompile(""), + []string{"😍.org", "xn--r28h.org", "🤩.org", "example.😍.org", "example.🤩.org", "a.example.xn--r28h.org", "a.example.🤩.org"}, + true, + map[string]string{ + "regexInclude": "(?:😍|🤩)\\.org$", + }, + }, + { + regexp.MustCompile("(?:😍|🤩)\\.org$"), + regexp.MustCompile("^example\\.(?:😍|🤩)\\.org$"), + []string{"example.😍.org", "example.🤩.org"}, + false, + map[string]string{ + "regexInclude": "(?:😍|🤩)\\.org$", + "regexExclude": "^example\\.(?:😍|🤩)\\.org$", + }, + }, { regexp.MustCompile("(?:foo|bar)\\.org$"), regexp.MustCompile("^example\\.(?:foo|bar)\\.org$"), @@ -479,8 +528,8 @@ func TestPrepareFiltersStripsWhitespaceAndDotSuffix(t *testing.T) { nil, }, { - []string{" foo ", " bar. ", "baz."}, - []string{"foo", "bar", "baz"}, + []string{" foo ", " bar. ", "baz.", "xn--bar-zna"}, + []string{"foo", "bar", "baz", "øbar"}, }, { []string{"foo.bar", " foo.bar. ", " foo.bar.baz ", " foo.bar.baz. "}, @@ -714,6 +763,24 @@ func TestDomainFilterMatchParent(t *testing.T) { "include": {"a.example.com", "b.example.com"}, }, }, + { + []string{"a.xn--c1yn36f.æøå.", "b.點看.xn--5cab8c", "c.點看.æøå"}, + []string{}, + []string{"xn--c1yn36f.xn--5cab8c"}, + true, + map[string][]string{ + "include": {"a.點看.æøå", "b.點看.æøå", "c.點看.æøå"}, + }, + }, + { + []string{"punycode.xn--c1yn36f.local", "å.點看.local.", "ø.點看.local"}, + []string{}, + []string{"點看.local"}, + true, + map[string][]string{ + "include": {"punycode.點看.local", "å.點看.local", "ø.點看.local"}, + }, + }, { []string{"a.example.com"}, []string{}, @@ -769,3 +836,67 @@ func TestDomainFilterMatchParent(t *testing.T) { }) } } + +func TestDomainFilterNormalizeDomain(t *testing.T) { + records := []struct { + dnsName string + expect string + }{ + { + "3AAAA.FOO.BAR.COM", + "3aaaa.foo.bar.com", + }, + { + "example.foo.com.", + "example.foo.com", + }, + { + "example123.foo.com", + "example123.foo.com", + }, + { + "foo.com.", + "foo.com", + }, + { + "foo123.COM", + "foo123.com", + }, + { + "my-exaMple3.FOO.BAR.COM", + "my-example3.foo.bar.com", + }, + { + "my-example1214.FOO-1235.BAR-foo.COM", + "my-example1214.foo-1235.bar-foo.com", + }, + { + "my-example-my-example-1214.FOO-1235.BAR-foo.COM", + "my-example-my-example-1214.foo-1235.bar-foo.com", + }, + { + "xn--c1yn36f.org.", + "點看.org", + }, + { + "xn--nordic--w1a.xn--xn--kItty-pd34d-hn01b3542b.com", + "nordic-ø.xn--kitty-點看pd34d.com", + }, + { + "xn--nordic--w1a.xn--kItty-pd34d.com", + "nordic-ø.kitty😸.com", + }, + { + "nordic-ø.kitty😸.COM", + "nordic-ø.kitty😸.com", + }, + { + "xn--nordic--w1a.kiTTy😸.com.", + "nordic-ø.kitty😸.com", + }, + } + for _, r := range records { + gotName := normalizeDomain(r.dnsName) + assert.Equal(t, r.expect, gotName) + } +}