From 4d8abac66316a3e549ff12c2cb37d47a9738522e Mon Sep 17 00:00:00 2001 From: jzonthemtn Date: Sun, 25 Aug 2024 09:33:00 -0400 Subject: [PATCH] 131 Adding option to email filter for just email addresses with valid TLDs. --- .../phileas/services/FilterPolicyLoader.java | 5 +- .../filters/regex/EmailAddressFilter.java | 27 +- .../main/resources/tlds-alpha-by-domain.txt | 1298 +++++++++++++++++ .../filters/EmailAddressFilterTest.java | 137 +- .../test/resources/tlds-alpha-by-domain.txt | 1298 +++++++++++++++++ .../model/policy/filters/EmailAddress.java | 24 + 6 files changed, 2783 insertions(+), 6 deletions(-) create mode 100644 phileas-core/src/main/resources/tlds-alpha-by-domain.txt create mode 100644 phileas-core/src/test/resources/tlds-alpha-by-domain.txt diff --git a/phileas-core/src/main/java/ai/philterd/phileas/services/FilterPolicyLoader.java b/phileas-core/src/main/java/ai/philterd/phileas/services/FilterPolicyLoader.java index 2e69576d7..1d5372d08 100644 --- a/phileas-core/src/main/java/ai/philterd/phileas/services/FilterPolicyLoader.java +++ b/phileas-core/src/main/java/ai/philterd/phileas/services/FilterPolicyLoader.java @@ -293,7 +293,10 @@ public List getFiltersForPolicy(final Policy policy, final Map tlds = null; + + public EmailAddressFilter(FilterConfiguration filterConfiguration, boolean onlyStrictMatches, boolean onlyValidTLDs) throws IOException { super(FilterType.EMAIL_ADDRESS, filterConfiguration); - final Pattern emailAddressPattern = Pattern.compile("(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|\"(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21\\x23-\\x5b\\x5d-\\x7f]|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])*\")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21-\\x5a\\x53-\\x7f]|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])+)\\])", Pattern.CASE_INSENSITIVE); + final Pattern emailAddressPattern = onlyStrictMatches + ? Pattern.compile("\\b(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|\"(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21\\x23-\\x5b\\x5d-\\x7f]|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])*\")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21-\\x5a\\x53-\\x7f]|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])+)\\b])", Pattern.CASE_INSENSITIVE) + : Pattern.compile("\\b[\\w.-]+?@(?:([a-zA-Z\\d\\-])+?\\.)+(?:[a-zA-Z\\d]{2,4})+\\b"); + final FilterPattern email1 = new FilterPattern.FilterPatternBuilder(emailAddressPattern, 0.90).build(); this.contextualTerms = new HashSet<>(); @@ -42,6 +54,13 @@ public EmailAddressFilter(FilterConfiguration filterConfiguration) { this.contextualTerms.add("e-mail"); this.analyzer = new Analyzer(contextualTerms, email1); + this.onlyValidTLDs = onlyValidTLDs; + + if(onlyValidTLDs) { + final File file = new File(getClass().getClassLoader().getResource("tlds-alpha-by-domain.txt").getFile()); + final List rawTlds = Files.readAllLines(file.toPath(), Charset.defaultCharset()); + this.tlds = rawTlds.stream().filter(str->!str.startsWith("#")).map(String::toLowerCase).map(s -> "." + s).toList(); + } } @@ -50,6 +69,10 @@ public FilterResult filter(Policy policy, String context, String documentId, int final List spans = findSpans(policy, analyzer, input, context, documentId, attributes); + if(onlyValidTLDs) { + spans.removeIf(str -> tlds.stream().noneMatch(str.getText()::endsWith)); + } + return new FilterResult(context, documentId, spans); } diff --git a/phileas-core/src/main/resources/tlds-alpha-by-domain.txt b/phileas-core/src/main/resources/tlds-alpha-by-domain.txt new file mode 100644 index 000000000..fddc07b80 --- /dev/null +++ b/phileas-core/src/main/resources/tlds-alpha-by-domain.txt @@ -0,0 +1,1298 @@ +# From https://data.iana.org/TLD/tlds-alpha-by-domain.txt +# Version 2024082500, Last Updated Sun Aug 25 07:07:01 2024 UTC +AAA +AARP +ABB +ABBOTT +ABBVIE +ABC +ABLE +ABOGADO +ABUDHABI +AC +ACADEMY +ACCENTURE +ACCOUNTANT +ACCOUNTANTS +ACO +ACTOR +AD +ADS +ADULT +AE +AEG +AERO +AETNA +AF +AFL +AFRICA +AG +AGAKHAN +AGENCY +AI +AIG +AIRBUS +AIRFORCE +AIRTEL +AKDN +AL +ALIBABA +ALIPAY +ALLFINANZ +ALLSTATE +ALLY +ALSACE +ALSTOM +AM +AMAZON +AMERICANEXPRESS +AMERICANFAMILY +AMEX +AMFAM +AMICA +AMSTERDAM +ANALYTICS +ANDROID +ANQUAN +ANZ +AO +AOL +APARTMENTS +APP +APPLE +AQ +AQUARELLE +AR +ARAB +ARAMCO +ARCHI +ARMY +ARPA +ART +ARTE +AS +ASDA +ASIA +ASSOCIATES +AT +ATHLETA +ATTORNEY +AU +AUCTION +AUDI +AUDIBLE +AUDIO +AUSPOST +AUTHOR +AUTO +AUTOS +AW +AWS +AX +AXA +AZ +AZURE +BA +BABY +BAIDU +BANAMEX +BAND +BANK +BAR +BARCELONA +BARCLAYCARD +BARCLAYS +BAREFOOT +BARGAINS +BASEBALL +BASKETBALL +BAUHAUS +BAYERN +BB +BBC +BBT +BBVA +BCG +BCN +BD +BE +BEATS +BEAUTY +BEER +BENTLEY +BERLIN +BEST +BESTBUY +BET +BF +BG +BH +BHARTI +BI +BIBLE +BID +BIKE +BING +BINGO +BIO +BIZ +BJ +BLACK +BLACKFRIDAY +BLOCKBUSTER +BLOG +BLOOMBERG +BLUE +BM +BMS +BMW +BN +BNPPARIBAS +BO +BOATS +BOEHRINGER +BOFA +BOM +BOND +BOO +BOOK +BOOKING +BOSCH +BOSTIK +BOSTON +BOT +BOUTIQUE +BOX +BR +BRADESCO +BRIDGESTONE +BROADWAY +BROKER +BROTHER +BRUSSELS +BS +BT +BUILD +BUILDERS +BUSINESS +BUY +BUZZ +BV +BW +BY +BZ +BZH +CA +CAB +CAFE +CAL +CALL +CALVINKLEIN +CAM +CAMERA +CAMP +CANON +CAPETOWN +CAPITAL +CAPITALONE +CAR +CARAVAN +CARDS +CARE +CAREER +CAREERS +CARS +CASA +CASE +CASH +CASINO +CAT +CATERING +CATHOLIC +CBA +CBN +CBRE +CC +CD +CENTER +CEO +CERN +CF +CFA +CFD +CG +CH +CHANEL +CHANNEL +CHARITY +CHASE +CHAT +CHEAP +CHINTAI +CHRISTMAS +CHROME +CHURCH +CI +CIPRIANI +CIRCLE +CISCO +CITADEL +CITI +CITIC +CITY +CK +CL +CLAIMS +CLEANING +CLICK +CLINIC +CLINIQUE +CLOTHING +CLOUD +CLUB +CLUBMED +CM +CN +CO +COACH +CODES +COFFEE +COLLEGE +COLOGNE +COM +COMMBANK +COMMUNITY +COMPANY +COMPARE +COMPUTER +COMSEC +CONDOS +CONSTRUCTION +CONSULTING +CONTACT +CONTRACTORS +COOKING +COOL +COOP +CORSICA +COUNTRY +COUPON +COUPONS +COURSES +CPA +CR +CREDIT +CREDITCARD +CREDITUNION +CRICKET +CROWN +CRS +CRUISE +CRUISES +CU +CUISINELLA +CV +CW +CX +CY +CYMRU +CYOU +CZ +DABUR +DAD +DANCE +DATA +DATE +DATING +DATSUN +DAY +DCLK +DDS +DE +DEAL +DEALER +DEALS +DEGREE +DELIVERY +DELL +DELOITTE +DELTA +DEMOCRAT +DENTAL +DENTIST +DESI +DESIGN +DEV +DHL +DIAMONDS +DIET +DIGITAL +DIRECT +DIRECTORY +DISCOUNT +DISCOVER +DISH +DIY +DJ +DK +DM +DNP +DO +DOCS +DOCTOR +DOG +DOMAINS +DOT +DOWNLOAD +DRIVE +DTV +DUBAI +DUNLOP +DUPONT +DURBAN +DVAG +DVR +DZ +EARTH +EAT +EC +ECO +EDEKA +EDU +EDUCATION +EE +EG +EMAIL +EMERCK +ENERGY +ENGINEER +ENGINEERING +ENTERPRISES +EPSON +EQUIPMENT +ER +ERICSSON +ERNI +ES +ESQ +ESTATE +ET +EU +EUROVISION +EUS +EVENTS +EXCHANGE +EXPERT +EXPOSED +EXPRESS +EXTRASPACE +FAGE +FAIL +FAIRWINDS +FAITH +FAMILY +FAN +FANS +FARM +FARMERS +FASHION +FAST +FEDEX +FEEDBACK +FERRARI +FERRERO +FI +FIDELITY +FIDO +FILM +FINAL +FINANCE +FINANCIAL +FIRE +FIRESTONE +FIRMDALE +FISH +FISHING +FIT +FITNESS +FJ +FK +FLICKR +FLIGHTS +FLIR +FLORIST +FLOWERS +FLY +FM +FO +FOO +FOOD +FOOTBALL +FORD +FOREX +FORSALE +FORUM +FOUNDATION +FOX +FR +FREE +FRESENIUS +FRL +FROGANS +FRONTIER +FTR +FUJITSU +FUN +FUND +FURNITURE +FUTBOL +FYI +GA +GAL +GALLERY +GALLO +GALLUP +GAME +GAMES +GAP +GARDEN +GAY +GB +GBIZ +GD +GDN +GE +GEA +GENT +GENTING +GEORGE +GF +GG +GGEE +GH +GI +GIFT +GIFTS +GIVES +GIVING +GL +GLASS +GLE +GLOBAL +GLOBO +GM +GMAIL +GMBH +GMO +GMX +GN +GODADDY +GOLD +GOLDPOINT +GOLF +GOO +GOODYEAR +GOOG +GOOGLE +GOP +GOT +GOV +GP +GQ +GR +GRAINGER +GRAPHICS +GRATIS +GREEN +GRIPE +GROCERY +GROUP +GS +GT +GU +GUCCI +GUGE +GUIDE +GUITARS +GURU +GW +GY +HAIR +HAMBURG +HANGOUT +HAUS +HBO +HDFC +HDFCBANK +HEALTH +HEALTHCARE +HELP +HELSINKI +HERE +HERMES +HIPHOP +HISAMITSU +HITACHI +HIV +HK +HKT +HM +HN +HOCKEY +HOLDINGS +HOLIDAY +HOMEDEPOT +HOMEGOODS +HOMES +HOMESENSE +HONDA +HORSE +HOSPITAL +HOST +HOSTING +HOT +HOTELS +HOTMAIL +HOUSE +HOW +HR +HSBC +HT +HU +HUGHES +HYATT +HYUNDAI +IBM +ICBC +ICE +ICU +ID +IE +IEEE +IFM +IKANO +IL +IM +IMAMAT +IMDB +IMMO +IMMOBILIEN +IN +INC +INDUSTRIES +INFINITI +INFO +ING +INK +INSTITUTE +INSURANCE +INSURE +INT +INTERNATIONAL +INTUIT +INVESTMENTS +IO +IPIRANGA +IQ +IR +IRISH +IS +ISMAILI +IST +ISTANBUL +IT +ITAU +ITV +JAGUAR +JAVA +JCB +JE +JEEP +JETZT +JEWELRY +JIO +JLL +JM +JMP +JNJ +JO +JOBS +JOBURG +JOT +JOY +JP +JPMORGAN +JPRS +JUEGOS +JUNIPER +KAUFEN +KDDI +KE +KERRYHOTELS +KERRYLOGISTICS +KERRYPROPERTIES +KFH +KG +KH +KI +KIA +KIDS +KIM +KINDLE +KITCHEN +KIWI +KM +KN +KOELN +KOMATSU +KOSHER +KP +KPMG +KPN +KR +KRD +KRED +KUOKGROUP +KW +KY +KYOTO +KZ +LA +LACAIXA +LAMBORGHINI +LAMER +LANCASTER +LAND +LANDROVER +LANXESS +LASALLE +LAT +LATINO +LATROBE +LAW +LAWYER +LB +LC +LDS +LEASE +LECLERC +LEFRAK +LEGAL +LEGO +LEXUS +LGBT +LI +LIDL +LIFE +LIFEINSURANCE +LIFESTYLE +LIGHTING +LIKE +LILLY +LIMITED +LIMO +LINCOLN +LINK +LIPSY +LIVE +LIVING +LK +LLC +LLP +LOAN +LOANS +LOCKER +LOCUS +LOL +LONDON +LOTTE +LOTTO +LOVE +LPL +LPLFINANCIAL +LR +LS +LT +LTD +LTDA +LU +LUNDBECK +LUXE +LUXURY +LV +LY +MA +MADRID +MAIF +MAISON +MAKEUP +MAN +MANAGEMENT +MANGO +MAP +MARKET +MARKETING +MARKETS +MARRIOTT +MARSHALLS +MATTEL +MBA +MC +MCKINSEY +MD +ME +MED +MEDIA +MEET +MELBOURNE +MEME +MEMORIAL +MEN +MENU +MERCKMSD +MG +MH +MIAMI +MICROSOFT +MIL +MINI +MINT +MIT +MITSUBISHI +MK +ML +MLB +MLS +MM +MMA +MN +MO +MOBI +MOBILE +MODA +MOE +MOI +MOM +MONASH +MONEY +MONSTER +MORMON +MORTGAGE +MOSCOW +MOTO +MOTORCYCLES +MOV +MOVIE +MP +MQ +MR +MS +MSD +MT +MTN +MTR +MU +MUSEUM +MUSIC +MV +MW +MX +MY +MZ +NA +NAB +NAGOYA +NAME +NAVY +NBA +NC +NE +NEC +NET +NETBANK +NETFLIX +NETWORK +NEUSTAR +NEW +NEWS +NEXT +NEXTDIRECT +NEXUS +NF +NFL +NG +NGO +NHK +NI +NICO +NIKE +NIKON +NINJA +NISSAN +NISSAY +NL +NO +NOKIA +NORTON +NOW +NOWRUZ +NOWTV +NP +NR +NRA +NRW +NTT +NU +NYC +NZ +OBI +OBSERVER +OFFICE +OKINAWA +OLAYAN +OLAYANGROUP +OLLO +OM +OMEGA +ONE +ONG +ONL +ONLINE +OOO +OPEN +ORACLE +ORANGE +ORG +ORGANIC +ORIGINS +OSAKA +OTSUKA +OTT +OVH +PA +PAGE +PANASONIC +PARIS +PARS +PARTNERS +PARTS +PARTY +PAY +PCCW +PE +PET +PF +PFIZER +PG +PH +PHARMACY +PHD +PHILIPS +PHONE +PHOTO +PHOTOGRAPHY +PHOTOS +PHYSIO +PICS +PICTET +PICTURES +PID +PIN +PING +PINK +PIONEER +PIZZA +PK +PL +PLACE +PLAY +PLAYSTATION +PLUMBING +PLUS +PM +PN +PNC +POHL +POKER +POLITIE +PORN +POST +PR +PRAMERICA +PRAXI +PRESS +PRIME +PRO +PROD +PRODUCTIONS +PROF +PROGRESSIVE +PROMO +PROPERTIES +PROPERTY +PROTECTION +PRU +PRUDENTIAL +PS +PT +PUB +PW +PWC +PY +QA +QPON +QUEBEC +QUEST +RACING +RADIO +RE +READ +REALESTATE +REALTOR +REALTY +RECIPES +RED +REDSTONE +REDUMBRELLA +REHAB +REISE +REISEN +REIT +RELIANCE +REN +RENT +RENTALS +REPAIR +REPORT +REPUBLICAN +REST +RESTAURANT +REVIEW +REVIEWS +REXROTH +RICH +RICHARDLI +RICOH +RIL +RIO +RIP +RO +ROCKS +RODEO +ROGERS +ROOM +RS +RSVP +RU +RUGBY +RUHR +RUN +RW +RWE +RYUKYU +SA +SAARLAND +SAFE +SAFETY +SAKURA +SALE +SALON +SAMSCLUB +SAMSUNG +SANDVIK +SANDVIKCOROMANT +SANOFI +SAP +SARL +SAS +SAVE +SAXO +SB +SBI +SBS +SC +SCB +SCHAEFFLER +SCHMIDT +SCHOLARSHIPS +SCHOOL +SCHULE +SCHWARZ +SCIENCE +SCOT +SD +SE +SEARCH +SEAT +SECURE +SECURITY +SEEK +SELECT +SENER +SERVICES +SEVEN +SEW +SEX +SEXY +SFR +SG +SH +SHANGRILA +SHARP +SHELL +SHIA +SHIKSHA +SHOES +SHOP +SHOPPING +SHOUJI +SHOW +SI +SILK +SINA +SINGLES +SITE +SJ +SK +SKI +SKIN +SKY +SKYPE +SL +SLING +SM +SMART +SMILE +SN +SNCF +SO +SOCCER +SOCIAL +SOFTBANK +SOFTWARE +SOHU +SOLAR +SOLUTIONS +SONG +SONY +SOY +SPA +SPACE +SPORT +SPOT +SR +SRL +SS +ST +STADA +STAPLES +STAR +STATEBANK +STATEFARM +STC +STCGROUP +STOCKHOLM +STORAGE +STORE +STREAM +STUDIO +STUDY +STYLE +SU +SUCKS +SUPPLIES +SUPPLY +SUPPORT +SURF +SURGERY +SUZUKI +SV +SWATCH +SWISS +SX +SY +SYDNEY +SYSTEMS +SZ +TAB +TAIPEI +TALK +TAOBAO +TARGET +TATAMOTORS +TATAR +TATTOO +TAX +TAXI +TC +TCI +TD +TDK +TEAM +TECH +TECHNOLOGY +TEL +TEMASEK +TENNIS +TEVA +TF +TG +TH +THD +THEATER +THEATRE +TIAA +TICKETS +TIENDA +TIPS +TIRES +TIROL +TJ +TJMAXX +TJX +TK +TKMAXX +TL +TM +TMALL +TN +TO +TODAY +TOKYO +TOOLS +TOP +TORAY +TOSHIBA +TOTAL +TOURS +TOWN +TOYOTA +TOYS +TR +TRADE +TRADING +TRAINING +TRAVEL +TRAVELERS +TRAVELERSINSURANCE +TRUST +TRV +TT +TUBE +TUI +TUNES +TUSHU +TV +TVS +TW +TZ +UA +UBANK +UBS +UG +UK +UNICOM +UNIVERSITY +UNO +UOL +UPS +US +UY +UZ +VA +VACATIONS +VANA +VANGUARD +VC +VE +VEGAS +VENTURES +VERISIGN +VERSICHERUNG +VET +VG +VI +VIAJES +VIDEO +VIG +VIKING +VILLAS +VIN +VIP +VIRGIN +VISA +VISION +VIVA +VIVO +VLAANDEREN +VN +VODKA +VOLVO +VOTE +VOTING +VOTO +VOYAGE +VU +WALES +WALMART +WALTER +WANG +WANGGOU +WATCH +WATCHES +WEATHER +WEATHERCHANNEL +WEBCAM +WEBER +WEBSITE +WED +WEDDING +WEIBO +WEIR +WF +WHOSWHO +WIEN +WIKI +WILLIAMHILL +WIN +WINDOWS +WINE +WINNERS +WME +WOLTERSKLUWER +WOODSIDE +WORK +WORKS +WORLD +WOW +WS +WTC +WTF +XBOX +XEROX +XIHUAN +XIN +XN +XXX +XYZ +YACHTS +YAHOO +YAMAXUN +YANDEX +YE +YODOBASHI +YOGA +YOKOHAMA +YOU +YOUTUBE +YT +YUN +ZA +ZAPPOS +ZARA +ZERO +ZIP +ZM +ZONE +ZUERICH +ZW \ No newline at end of file diff --git a/phileas-core/src/test/java/ai/philterd/test/phileas/services/filters/EmailAddressFilterTest.java b/phileas-core/src/test/java/ai/philterd/test/phileas/services/filters/EmailAddressFilterTest.java index 6edebb992..8b9305c73 100644 --- a/phileas-core/src/test/java/ai/philterd/test/phileas/services/filters/EmailAddressFilterTest.java +++ b/phileas-core/src/test/java/ai/philterd/test/phileas/services/filters/EmailAddressFilterTest.java @@ -18,6 +18,7 @@ import ai.philterd.phileas.model.enums.FilterType; import ai.philterd.phileas.model.filter.FilterConfiguration; import ai.philterd.phileas.model.objects.FilterResult; +import ai.philterd.phileas.model.policy.Policy; import ai.philterd.phileas.model.policy.filters.strategies.rules.EmailAddressFilterStrategy; import ai.philterd.phileas.model.services.AlertService; import ai.philterd.phileas.services.anonymization.AlphanumericAnonymizationService; @@ -27,7 +28,6 @@ import org.junit.jupiter.api.Test; import org.mockito.Mockito; -import java.util.Arrays; import java.util.List; public class EmailAddressFilterTest extends AbstractFilterTest { @@ -35,7 +35,7 @@ public class EmailAddressFilterTest extends AbstractFilterTest { private final AlertService alertService = Mockito.mock(AlertService.class); @Test - public void filterEmail() throws Exception { + public void filterEmailStrict() throws Exception { final FilterConfiguration filterConfiguration = new FilterConfiguration.FilterConfigurationBuilder() .withStrategies(List.of(new EmailAddressFilterStrategy())) @@ -44,7 +44,35 @@ public void filterEmail() throws Exception { .withWindowSize(windowSize) .build(); - final EmailAddressFilter filter = new EmailAddressFilter(filterConfiguration); + filterEmails(filterConfiguration, true, false); + + } + + @Test + public void filterEmailRelaxed() throws Exception { + + final FilterConfiguration filterConfiguration = new FilterConfiguration.FilterConfigurationBuilder() + .withStrategies(List.of(new EmailAddressFilterStrategy())) + .withAlertService(alertService) + .withAnonymizationService(new AlphanumericAnonymizationService(new LocalAnonymizationCacheService())) + .withWindowSize(windowSize) + .build(); + + filterEmails(filterConfiguration, false, false); + + } + + @Test + public void filterEmailOnlyValidTLDs() throws Exception { + + final FilterConfiguration filterConfiguration = new FilterConfiguration.FilterConfigurationBuilder() + .withStrategies(List.of(new EmailAddressFilterStrategy())) + .withAlertService(alertService) + .withAnonymizationService(new AlphanumericAnonymizationService(new LocalAnonymizationCacheService())) + .withWindowSize(windowSize) + .build(); + + final EmailAddressFilter filter = new EmailAddressFilter(filterConfiguration, true, true); final FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE, "my email is none@none.com.", attributes); Assertions.assertEquals(1, filterResult.getSpans().size()); @@ -53,4 +81,107 @@ public void filterEmail() throws Exception { } + @Test + public void filterEmailOnlyInvalidTLDs() throws Exception { + + final FilterConfiguration filterConfiguration = new FilterConfiguration.FilterConfigurationBuilder() + .withStrategies(List.of(new EmailAddressFilterStrategy())) + .withAlertService(alertService) + .withAnonymizationService(new AlphanumericAnonymizationService(new LocalAnonymizationCacheService())) + .withWindowSize(windowSize) + .build(); + + final EmailAddressFilter filter = new EmailAddressFilter(filterConfiguration, true, true); + + final FilterResult filterResult1 = filter.filter(getPolicy(), "context", "documentid", PIECE, "my email is none@none.codfm.", attributes); + Assertions.assertEquals(0, filterResult1.getSpans().size()); + + final FilterResult filterResult2 = filter.filter(getPolicy(), "context", "documentid", PIECE, "my email is none@none.com.dmf.", attributes); + Assertions.assertEquals(0, filterResult2.getSpans().size()); + + final FilterResult filterResult3 = filter.filter(getPolicy(), "context", "documentid", PIECE, "my email is none@none.cob", attributes); + Assertions.assertEquals(0, filterResult3.getSpans().size()); + + } + + @Test + public void filterEmailOnlyInvalidTLDsWithNoStrictMatches() throws Exception { + + final FilterConfiguration filterConfiguration = new FilterConfiguration.FilterConfigurationBuilder() + .withStrategies(List.of(new EmailAddressFilterStrategy())) + .withAlertService(alertService) + .withAnonymizationService(new AlphanumericAnonymizationService(new LocalAnonymizationCacheService())) + .withWindowSize(windowSize) + .build(); + + final EmailAddressFilter filter = new EmailAddressFilter(filterConfiguration, false, true); + + final FilterResult filterResult4 = filter.filter(getPolicy(), "context", "documentid", PIECE, "my email is none@lb.co_m", attributes); + showSpans(filterResult4.getSpans()); + Assertions.assertEquals(0, filterResult4.getSpans().size()); + + } + + private void filterEmails(FilterConfiguration filterConfiguration, boolean onlyStrictMatches, boolean onlyValidTLDs) throws Exception { + + final String cxt = "context"; + final String doc = "documentid"; + final EmailAddressFilter filter = new EmailAddressFilter(filterConfiguration, onlyStrictMatches, onlyValidTLDs); + final Policy policy = getPolicy(); + + final FilterResult filterResult = filter.filter(policy, cxt, doc, PIECE, "my email is none@none.com.", attributes); + Assertions.assertEquals(1, filterResult.getSpans().size()); + Assertions.assertTrue(checkSpan(filterResult.getSpans().get(0), 12, 25, FilterType.EMAIL_ADDRESS)); + Assertions.assertEquals("none@none.com", filterResult.getSpans().get(0).getText()); + + // 👇 cases adapted from https://www.tumblr.com/codefool/15288874550/list-of-valid-and-invalid-email-addresses + + // valid email addresses + Assertions.assertEquals(1, filter.filter(policy, cxt, doc, PIECE, "firstname.lastname@example.com", attributes).getSpans().size()); + Assertions.assertEquals(1, filter.filter(policy, cxt, doc, PIECE, "email@subdomain.example.com", attributes).getSpans().size()); + Assertions.assertEquals(1, filter.filter(policy, cxt, doc, PIECE, "firstname+lastname@example.com", attributes).getSpans().size()); + Assertions.assertEquals(1, filter.filter(policy, cxt, doc, PIECE, "email@123.123.123.123", attributes).getSpans().size()); + Assertions.assertEquals(1, filter.filter(policy, cxt, doc, PIECE, "1234567890@example.com", attributes).getSpans().size()); + Assertions.assertEquals(1, filter.filter(policy, cxt, doc, PIECE, "email@example-one.com", attributes).getSpans().size()); + Assertions.assertEquals(1, filter.filter(policy, cxt, doc, PIECE, "_______@example.com", attributes).getSpans().size()); + Assertions.assertEquals(1, filter.filter(policy, cxt, doc, PIECE, "email@example.name", attributes).getSpans().size()); + Assertions.assertEquals(1, filter.filter(policy, cxt, doc, PIECE, "email@example.museum", attributes).getSpans().size()); + Assertions.assertEquals(1, filter.filter(policy, cxt, doc, PIECE, "email@example.co.jp", attributes).getSpans().size()); + Assertions.assertEquals(1, filter.filter(policy, cxt, doc, PIECE, "firstname-lastname@example.com", attributes).getSpans().size()); + Assertions.assertEquals(1, filter.filter(policy, cxt, doc, PIECE, "very.unusual.“@”.unusual.com@example.com", attributes).getSpans().size()); + Assertions.assertEquals(1, filter.filter(policy, cxt, doc, PIECE, "very.“(),:;<>[]”.VERY.“very@\\\\ \"very”.unusual@strange.example.com", attributes).getSpans().size()); + //Assertions.assertEquals(1, filter.filter(policy, cxt, doc, PIECE, "“email”@example.com", attributes).getSpans().size()); // todo include quotes + //Assertions.assertEquals(1, filter.filter(policy, cxt, doc, PIECE, "much.“more\\ unusual”@example.com", attributes).getSpans().size()); // todo include quotes + + // valid email addresses only detected with strict matching + Assertions.assertEquals(onlyStrictMatches ? 1 : 0, filter.filter(policy, cxt, doc, PIECE, "email@[123.123.123.123]", attributes).getSpans().size()); + + // invalid email addresses + Assertions.assertEquals(0, filter.filter(policy, cxt, doc, PIECE, "plainaddress", attributes).getSpans().size()); + Assertions.assertEquals(0, filter.filter(policy, cxt, doc, PIECE, "#@%^%#$@#$@#.com", attributes).getSpans().size()); + Assertions.assertEquals(0, filter.filter(policy, cxt, doc, PIECE, "@example.com", attributes).getSpans().size()); + Assertions.assertEquals(0, filter.filter(policy, cxt, doc, PIECE, "email.example.com", attributes).getSpans().size()); + Assertions.assertEquals(0, filter.filter(policy, cxt, doc, PIECE, "あいうえお@example.com", attributes).getSpans().size()); + Assertions.assertEquals(0, filter.filter(policy, cxt, doc, PIECE, "email@example", attributes).getSpans().size()); + Assertions.assertEquals(0, filter.filter(policy, cxt, doc, PIECE, "email@example..com", attributes).getSpans().size()); + Assertions.assertEquals(0, filter.filter(policy, cxt, doc, PIECE, "“(),:;<>[\\]@example.com", attributes).getSpans().size()); + //Assertions.assertEquals(0, filter.filter(policy, cxt, doc, PIECE, "email@example.web", attributes).getSpans().size()); // todo detect invalid TLD + //Assertions.assertEquals(0, filter.filter(policy, cxt, doc, PIECE, "email@111.222.333.44444", attributes).getSpans().size()); // todo detect invalid TLD + + // invalid email addresses only rejected with strict matching + Assertions.assertEquals(onlyStrictMatches ? 0 : 1, filter.filter(policy, cxt, doc, PIECE, "email.@example.com", attributes).getSpans().size()); + Assertions.assertEquals(onlyStrictMatches ? 0 : 1, filter.filter(policy, cxt, doc, PIECE, "email@-example.com", attributes).getSpans().size()); + + // valid partial matches against invalid email addresses + Assertions.assertEquals(1, filter.filter(policy, cxt, doc, PIECE, "Joe Smith ", attributes).getSpans().size()); + Assertions.assertEquals(1, filter.filter(policy, cxt, doc, PIECE, "email@example@example.com", attributes).getSpans().size()); + Assertions.assertEquals(1, filter.filter(policy, cxt, doc, PIECE, ".email@example.com", attributes).getSpans().size()); + Assertions.assertEquals(1, filter.filter(policy, cxt, doc, PIECE, "email..email@example.com", attributes).getSpans().size()); + Assertions.assertEquals(1, filter.filter(policy, cxt, doc, PIECE, "email@example.com (Joe Smith)", attributes).getSpans().size()); + Assertions.assertEquals(1, filter.filter(policy, cxt, doc, PIECE, "Abc..123@example.com", attributes).getSpans().size()); + Assertions.assertEquals(1, filter.filter(policy, cxt, doc, PIECE, "just\"not\"right@example.com", attributes).getSpans().size()); + Assertions.assertEquals(1, filter.filter(policy, cxt, doc, PIECE, "this\\ is\"really\"not\\allowed@example.com", attributes).getSpans().size()); + + } + } diff --git a/phileas-core/src/test/resources/tlds-alpha-by-domain.txt b/phileas-core/src/test/resources/tlds-alpha-by-domain.txt new file mode 100644 index 000000000..fddc07b80 --- /dev/null +++ b/phileas-core/src/test/resources/tlds-alpha-by-domain.txt @@ -0,0 +1,1298 @@ +# From https://data.iana.org/TLD/tlds-alpha-by-domain.txt +# Version 2024082500, Last Updated Sun Aug 25 07:07:01 2024 UTC +AAA +AARP +ABB +ABBOTT +ABBVIE +ABC +ABLE +ABOGADO +ABUDHABI +AC +ACADEMY +ACCENTURE +ACCOUNTANT +ACCOUNTANTS +ACO +ACTOR +AD +ADS +ADULT +AE +AEG +AERO +AETNA +AF +AFL +AFRICA +AG +AGAKHAN +AGENCY +AI +AIG +AIRBUS +AIRFORCE +AIRTEL +AKDN +AL +ALIBABA +ALIPAY +ALLFINANZ +ALLSTATE +ALLY +ALSACE +ALSTOM +AM +AMAZON +AMERICANEXPRESS +AMERICANFAMILY +AMEX +AMFAM +AMICA +AMSTERDAM +ANALYTICS +ANDROID +ANQUAN +ANZ +AO +AOL +APARTMENTS +APP +APPLE +AQ +AQUARELLE +AR +ARAB +ARAMCO +ARCHI +ARMY +ARPA +ART +ARTE +AS +ASDA +ASIA +ASSOCIATES +AT +ATHLETA +ATTORNEY +AU +AUCTION +AUDI +AUDIBLE +AUDIO +AUSPOST +AUTHOR +AUTO +AUTOS +AW +AWS +AX +AXA +AZ +AZURE +BA +BABY +BAIDU +BANAMEX +BAND +BANK +BAR +BARCELONA +BARCLAYCARD +BARCLAYS +BAREFOOT +BARGAINS +BASEBALL +BASKETBALL +BAUHAUS +BAYERN +BB +BBC +BBT +BBVA +BCG +BCN +BD +BE +BEATS +BEAUTY +BEER +BENTLEY +BERLIN +BEST +BESTBUY +BET +BF +BG +BH +BHARTI +BI +BIBLE +BID +BIKE +BING +BINGO +BIO +BIZ +BJ +BLACK +BLACKFRIDAY +BLOCKBUSTER +BLOG +BLOOMBERG +BLUE +BM +BMS +BMW +BN +BNPPARIBAS +BO +BOATS +BOEHRINGER +BOFA +BOM +BOND +BOO +BOOK +BOOKING +BOSCH +BOSTIK +BOSTON +BOT +BOUTIQUE +BOX +BR +BRADESCO +BRIDGESTONE +BROADWAY +BROKER +BROTHER +BRUSSELS +BS +BT +BUILD +BUILDERS +BUSINESS +BUY +BUZZ +BV +BW +BY +BZ +BZH +CA +CAB +CAFE +CAL +CALL +CALVINKLEIN +CAM +CAMERA +CAMP +CANON +CAPETOWN +CAPITAL +CAPITALONE +CAR +CARAVAN +CARDS +CARE +CAREER +CAREERS +CARS +CASA +CASE +CASH +CASINO +CAT +CATERING +CATHOLIC +CBA +CBN +CBRE +CC +CD +CENTER +CEO +CERN +CF +CFA +CFD +CG +CH +CHANEL +CHANNEL +CHARITY +CHASE +CHAT +CHEAP +CHINTAI +CHRISTMAS +CHROME +CHURCH +CI +CIPRIANI +CIRCLE +CISCO +CITADEL +CITI +CITIC +CITY +CK +CL +CLAIMS +CLEANING +CLICK +CLINIC +CLINIQUE +CLOTHING +CLOUD +CLUB +CLUBMED +CM +CN +CO +COACH +CODES +COFFEE +COLLEGE +COLOGNE +COM +COMMBANK +COMMUNITY +COMPANY +COMPARE +COMPUTER +COMSEC +CONDOS +CONSTRUCTION +CONSULTING +CONTACT +CONTRACTORS +COOKING +COOL +COOP +CORSICA +COUNTRY +COUPON +COUPONS +COURSES +CPA +CR +CREDIT +CREDITCARD +CREDITUNION +CRICKET +CROWN +CRS +CRUISE +CRUISES +CU +CUISINELLA +CV +CW +CX +CY +CYMRU +CYOU +CZ +DABUR +DAD +DANCE +DATA +DATE +DATING +DATSUN +DAY +DCLK +DDS +DE +DEAL +DEALER +DEALS +DEGREE +DELIVERY +DELL +DELOITTE +DELTA +DEMOCRAT +DENTAL +DENTIST +DESI +DESIGN +DEV +DHL +DIAMONDS +DIET +DIGITAL +DIRECT +DIRECTORY +DISCOUNT +DISCOVER +DISH +DIY +DJ +DK +DM +DNP +DO +DOCS +DOCTOR +DOG +DOMAINS +DOT +DOWNLOAD +DRIVE +DTV +DUBAI +DUNLOP +DUPONT +DURBAN +DVAG +DVR +DZ +EARTH +EAT +EC +ECO +EDEKA +EDU +EDUCATION +EE +EG +EMAIL +EMERCK +ENERGY +ENGINEER +ENGINEERING +ENTERPRISES +EPSON +EQUIPMENT +ER +ERICSSON +ERNI +ES +ESQ +ESTATE +ET +EU +EUROVISION +EUS +EVENTS +EXCHANGE +EXPERT +EXPOSED +EXPRESS +EXTRASPACE +FAGE +FAIL +FAIRWINDS +FAITH +FAMILY +FAN +FANS +FARM +FARMERS +FASHION +FAST +FEDEX +FEEDBACK +FERRARI +FERRERO +FI +FIDELITY +FIDO +FILM +FINAL +FINANCE +FINANCIAL +FIRE +FIRESTONE +FIRMDALE +FISH +FISHING +FIT +FITNESS +FJ +FK +FLICKR +FLIGHTS +FLIR +FLORIST +FLOWERS +FLY +FM +FO +FOO +FOOD +FOOTBALL +FORD +FOREX +FORSALE +FORUM +FOUNDATION +FOX +FR +FREE +FRESENIUS +FRL +FROGANS +FRONTIER +FTR +FUJITSU +FUN +FUND +FURNITURE +FUTBOL +FYI +GA +GAL +GALLERY +GALLO +GALLUP +GAME +GAMES +GAP +GARDEN +GAY +GB +GBIZ +GD +GDN +GE +GEA +GENT +GENTING +GEORGE +GF +GG +GGEE +GH +GI +GIFT +GIFTS +GIVES +GIVING +GL +GLASS +GLE +GLOBAL +GLOBO +GM +GMAIL +GMBH +GMO +GMX +GN +GODADDY +GOLD +GOLDPOINT +GOLF +GOO +GOODYEAR +GOOG +GOOGLE +GOP +GOT +GOV +GP +GQ +GR +GRAINGER +GRAPHICS +GRATIS +GREEN +GRIPE +GROCERY +GROUP +GS +GT +GU +GUCCI +GUGE +GUIDE +GUITARS +GURU +GW +GY +HAIR +HAMBURG +HANGOUT +HAUS +HBO +HDFC +HDFCBANK +HEALTH +HEALTHCARE +HELP +HELSINKI +HERE +HERMES +HIPHOP +HISAMITSU +HITACHI +HIV +HK +HKT +HM +HN +HOCKEY +HOLDINGS +HOLIDAY +HOMEDEPOT +HOMEGOODS +HOMES +HOMESENSE +HONDA +HORSE +HOSPITAL +HOST +HOSTING +HOT +HOTELS +HOTMAIL +HOUSE +HOW +HR +HSBC +HT +HU +HUGHES +HYATT +HYUNDAI +IBM +ICBC +ICE +ICU +ID +IE +IEEE +IFM +IKANO +IL +IM +IMAMAT +IMDB +IMMO +IMMOBILIEN +IN +INC +INDUSTRIES +INFINITI +INFO +ING +INK +INSTITUTE +INSURANCE +INSURE +INT +INTERNATIONAL +INTUIT +INVESTMENTS +IO +IPIRANGA +IQ +IR +IRISH +IS +ISMAILI +IST +ISTANBUL +IT +ITAU +ITV +JAGUAR +JAVA +JCB +JE +JEEP +JETZT +JEWELRY +JIO +JLL +JM +JMP +JNJ +JO +JOBS +JOBURG +JOT +JOY +JP +JPMORGAN +JPRS +JUEGOS +JUNIPER +KAUFEN +KDDI +KE +KERRYHOTELS +KERRYLOGISTICS +KERRYPROPERTIES +KFH +KG +KH +KI +KIA +KIDS +KIM +KINDLE +KITCHEN +KIWI +KM +KN +KOELN +KOMATSU +KOSHER +KP +KPMG +KPN +KR +KRD +KRED +KUOKGROUP +KW +KY +KYOTO +KZ +LA +LACAIXA +LAMBORGHINI +LAMER +LANCASTER +LAND +LANDROVER +LANXESS +LASALLE +LAT +LATINO +LATROBE +LAW +LAWYER +LB +LC +LDS +LEASE +LECLERC +LEFRAK +LEGAL +LEGO +LEXUS +LGBT +LI +LIDL +LIFE +LIFEINSURANCE +LIFESTYLE +LIGHTING +LIKE +LILLY +LIMITED +LIMO +LINCOLN +LINK +LIPSY +LIVE +LIVING +LK +LLC +LLP +LOAN +LOANS +LOCKER +LOCUS +LOL +LONDON +LOTTE +LOTTO +LOVE +LPL +LPLFINANCIAL +LR +LS +LT +LTD +LTDA +LU +LUNDBECK +LUXE +LUXURY +LV +LY +MA +MADRID +MAIF +MAISON +MAKEUP +MAN +MANAGEMENT +MANGO +MAP +MARKET +MARKETING +MARKETS +MARRIOTT +MARSHALLS +MATTEL +MBA +MC +MCKINSEY +MD +ME +MED +MEDIA +MEET +MELBOURNE +MEME +MEMORIAL +MEN +MENU +MERCKMSD +MG +MH +MIAMI +MICROSOFT +MIL +MINI +MINT +MIT +MITSUBISHI +MK +ML +MLB +MLS +MM +MMA +MN +MO +MOBI +MOBILE +MODA +MOE +MOI +MOM +MONASH +MONEY +MONSTER +MORMON +MORTGAGE +MOSCOW +MOTO +MOTORCYCLES +MOV +MOVIE +MP +MQ +MR +MS +MSD +MT +MTN +MTR +MU +MUSEUM +MUSIC +MV +MW +MX +MY +MZ +NA +NAB +NAGOYA +NAME +NAVY +NBA +NC +NE +NEC +NET +NETBANK +NETFLIX +NETWORK +NEUSTAR +NEW +NEWS +NEXT +NEXTDIRECT +NEXUS +NF +NFL +NG +NGO +NHK +NI +NICO +NIKE +NIKON +NINJA +NISSAN +NISSAY +NL +NO +NOKIA +NORTON +NOW +NOWRUZ +NOWTV +NP +NR +NRA +NRW +NTT +NU +NYC +NZ +OBI +OBSERVER +OFFICE +OKINAWA +OLAYAN +OLAYANGROUP +OLLO +OM +OMEGA +ONE +ONG +ONL +ONLINE +OOO +OPEN +ORACLE +ORANGE +ORG +ORGANIC +ORIGINS +OSAKA +OTSUKA +OTT +OVH +PA +PAGE +PANASONIC +PARIS +PARS +PARTNERS +PARTS +PARTY +PAY +PCCW +PE +PET +PF +PFIZER +PG +PH +PHARMACY +PHD +PHILIPS +PHONE +PHOTO +PHOTOGRAPHY +PHOTOS +PHYSIO +PICS +PICTET +PICTURES +PID +PIN +PING +PINK +PIONEER +PIZZA +PK +PL +PLACE +PLAY +PLAYSTATION +PLUMBING +PLUS +PM +PN +PNC +POHL +POKER +POLITIE +PORN +POST +PR +PRAMERICA +PRAXI +PRESS +PRIME +PRO +PROD +PRODUCTIONS +PROF +PROGRESSIVE +PROMO +PROPERTIES +PROPERTY +PROTECTION +PRU +PRUDENTIAL +PS +PT +PUB +PW +PWC +PY +QA +QPON +QUEBEC +QUEST +RACING +RADIO +RE +READ +REALESTATE +REALTOR +REALTY +RECIPES +RED +REDSTONE +REDUMBRELLA +REHAB +REISE +REISEN +REIT +RELIANCE +REN +RENT +RENTALS +REPAIR +REPORT +REPUBLICAN +REST +RESTAURANT +REVIEW +REVIEWS +REXROTH +RICH +RICHARDLI +RICOH +RIL +RIO +RIP +RO +ROCKS +RODEO +ROGERS +ROOM +RS +RSVP +RU +RUGBY +RUHR +RUN +RW +RWE +RYUKYU +SA +SAARLAND +SAFE +SAFETY +SAKURA +SALE +SALON +SAMSCLUB +SAMSUNG +SANDVIK +SANDVIKCOROMANT +SANOFI +SAP +SARL +SAS +SAVE +SAXO +SB +SBI +SBS +SC +SCB +SCHAEFFLER +SCHMIDT +SCHOLARSHIPS +SCHOOL +SCHULE +SCHWARZ +SCIENCE +SCOT +SD +SE +SEARCH +SEAT +SECURE +SECURITY +SEEK +SELECT +SENER +SERVICES +SEVEN +SEW +SEX +SEXY +SFR +SG +SH +SHANGRILA +SHARP +SHELL +SHIA +SHIKSHA +SHOES +SHOP +SHOPPING +SHOUJI +SHOW +SI +SILK +SINA +SINGLES +SITE +SJ +SK +SKI +SKIN +SKY +SKYPE +SL +SLING +SM +SMART +SMILE +SN +SNCF +SO +SOCCER +SOCIAL +SOFTBANK +SOFTWARE +SOHU +SOLAR +SOLUTIONS +SONG +SONY +SOY +SPA +SPACE +SPORT +SPOT +SR +SRL +SS +ST +STADA +STAPLES +STAR +STATEBANK +STATEFARM +STC +STCGROUP +STOCKHOLM +STORAGE +STORE +STREAM +STUDIO +STUDY +STYLE +SU +SUCKS +SUPPLIES +SUPPLY +SUPPORT +SURF +SURGERY +SUZUKI +SV +SWATCH +SWISS +SX +SY +SYDNEY +SYSTEMS +SZ +TAB +TAIPEI +TALK +TAOBAO +TARGET +TATAMOTORS +TATAR +TATTOO +TAX +TAXI +TC +TCI +TD +TDK +TEAM +TECH +TECHNOLOGY +TEL +TEMASEK +TENNIS +TEVA +TF +TG +TH +THD +THEATER +THEATRE +TIAA +TICKETS +TIENDA +TIPS +TIRES +TIROL +TJ +TJMAXX +TJX +TK +TKMAXX +TL +TM +TMALL +TN +TO +TODAY +TOKYO +TOOLS +TOP +TORAY +TOSHIBA +TOTAL +TOURS +TOWN +TOYOTA +TOYS +TR +TRADE +TRADING +TRAINING +TRAVEL +TRAVELERS +TRAVELERSINSURANCE +TRUST +TRV +TT +TUBE +TUI +TUNES +TUSHU +TV +TVS +TW +TZ +UA +UBANK +UBS +UG +UK +UNICOM +UNIVERSITY +UNO +UOL +UPS +US +UY +UZ +VA +VACATIONS +VANA +VANGUARD +VC +VE +VEGAS +VENTURES +VERISIGN +VERSICHERUNG +VET +VG +VI +VIAJES +VIDEO +VIG +VIKING +VILLAS +VIN +VIP +VIRGIN +VISA +VISION +VIVA +VIVO +VLAANDEREN +VN +VODKA +VOLVO +VOTE +VOTING +VOTO +VOYAGE +VU +WALES +WALMART +WALTER +WANG +WANGGOU +WATCH +WATCHES +WEATHER +WEATHERCHANNEL +WEBCAM +WEBER +WEBSITE +WED +WEDDING +WEIBO +WEIR +WF +WHOSWHO +WIEN +WIKI +WILLIAMHILL +WIN +WINDOWS +WINE +WINNERS +WME +WOLTERSKLUWER +WOODSIDE +WORK +WORKS +WORLD +WOW +WS +WTC +WTF +XBOX +XEROX +XIHUAN +XIN +XN +XXX +XYZ +YACHTS +YAHOO +YAMAXUN +YANDEX +YE +YODOBASHI +YOGA +YOKOHAMA +YOU +YOUTUBE +YT +YUN +ZA +ZAPPOS +ZARA +ZERO +ZIP +ZM +ZONE +ZUERICH +ZW \ No newline at end of file diff --git a/phileas-model/src/main/java/ai/philterd/phileas/model/policy/filters/EmailAddress.java b/phileas-model/src/main/java/ai/philterd/phileas/model/policy/filters/EmailAddress.java index a6b3ee4fe..92bcb28c0 100644 --- a/phileas-model/src/main/java/ai/philterd/phileas/model/policy/filters/EmailAddress.java +++ b/phileas-model/src/main/java/ai/philterd/phileas/model/policy/filters/EmailAddress.java @@ -23,6 +23,14 @@ public class EmailAddress extends AbstractFilter { + @SerializedName("onlyStrictMatches") + @Expose + protected boolean onlyStrictMatches = true; + + @SerializedName("onlyValidTLDs") + @Expose + protected boolean onlyValidTLDs = false; + @SerializedName("emailAddressFilterStrategies") @Expose private List emailAddressFilterStrategies; @@ -35,4 +43,20 @@ public void setEmailAddressFilterStrategies(List ema this.emailAddressFilterStrategies = emailAddressFilterStrategies; } + public boolean isOnlyStrictMatches() { + return onlyStrictMatches; + } + + public void setOnlyStrictMatches(boolean value) { + onlyStrictMatches = value; + } + + public boolean isOnlyValidTLDs() { + return onlyValidTLDs; + } + + public void setOnlyValidTLDs(boolean onlyValidTLDs) { + this.onlyValidTLDs = onlyValidTLDs; + } + } \ No newline at end of file