From 0e7fda0cae47c4dbc9f215f801d0bc585242f550 Mon Sep 17 00:00:00 2001 From: hlgp Date: Tue, 1 Oct 2024 23:57:08 +0000 Subject: [PATCH 1/8] detect wildcard patterns we may need to handle at eval_only time --- .../data/normalizer/NumberNormalizer.java | 6 +- .../data/normalizer/ZeroRegexStatus.java | 5 ++ .../normalizer/regex/NumericRegexEncoder.java | 15 +++++ .../data/normalizer/regex/RegexUtils.java | 35 ++++++++++++ .../normalizer/regex/visitor/ZeroTrimmer.java | 57 +++++++++++++++++++ .../regex/visitor/ZeroTrimmerTest.java | 38 +++++++++++++ 6 files changed, 155 insertions(+), 1 deletion(-) create mode 100644 src/main/java/datawave/data/normalizer/ZeroRegexStatus.java diff --git a/src/main/java/datawave/data/normalizer/NumberNormalizer.java b/src/main/java/datawave/data/normalizer/NumberNormalizer.java index 13bac1e..0a75651 100644 --- a/src/main/java/datawave/data/normalizer/NumberNormalizer.java +++ b/src/main/java/datawave/data/normalizer/NumberNormalizer.java @@ -29,7 +29,7 @@ public String normalize(String fv) { } /** - * We cannot support regex against numbers + * We can support regex against numbers. */ public String normalizeRegex(String fieldRegex) { try { @@ -40,6 +40,10 @@ public String normalizeRegex(String fieldRegex) { } } + public ZeroRegexStatus getZeroRegexStatus(String untrimmedRegex) { + return NumericRegexEncoder.getZeroRegexStatus(untrimmedRegex); + } + @Override public String normalizeDelegateType(BigDecimal delegateIn) { return normalize(delegateIn.toString()); diff --git a/src/main/java/datawave/data/normalizer/ZeroRegexStatus.java b/src/main/java/datawave/data/normalizer/ZeroRegexStatus.java new file mode 100644 index 0000000..2e6f43a --- /dev/null +++ b/src/main/java/datawave/data/normalizer/ZeroRegexStatus.java @@ -0,0 +1,5 @@ +package datawave.data.normalizer; + +public enum ZeroRegexStatus { + LEADING, TRAILING, NONE +} diff --git a/src/main/java/datawave/data/normalizer/regex/NumericRegexEncoder.java b/src/main/java/datawave/data/normalizer/regex/NumericRegexEncoder.java index 72e2142..21e1ac2 100644 --- a/src/main/java/datawave/data/normalizer/regex/NumericRegexEncoder.java +++ b/src/main/java/datawave/data/normalizer/regex/NumericRegexEncoder.java @@ -8,6 +8,7 @@ import com.google.common.base.CharMatcher; +import datawave.data.normalizer.ZeroRegexStatus; import datawave.data.normalizer.regex.visitor.AlternationDeduper; import datawave.data.normalizer.regex.visitor.AnchorTrimmer; import datawave.data.normalizer.regex.visitor.DecimalPointPlacer; @@ -143,6 +144,10 @@ private NumericRegexEncoder(String pattern) { this.pattern = pattern; } + public static ZeroRegexStatus getZeroRegexStatus(String regex) { + return ZeroTrimmer.getStatus(RegexParser.parse(regex).getChildren()); + } + private String encode() { if (log.isDebugEnabled()) { log.debug("Encoding pattern " + pattern); @@ -316,6 +321,16 @@ private void encodePatternTree() { dedupe(); } + private void encodeWithoutTrimming() { + dedupe(); + encodeSimpleNumbers(); + // If there are no more unencoded sub-patterns in the tree after encoding simple numbers, no further work needs to be done. + if (!moreToEncode()) { + return; + } + addExponentialBins(); + } + /** * Parse the pattern to a node tree. */ diff --git a/src/main/java/datawave/data/normalizer/regex/RegexUtils.java b/src/main/java/datawave/data/normalizer/regex/RegexUtils.java index ed26b38..2fdd487 100644 --- a/src/main/java/datawave/data/normalizer/regex/RegexUtils.java +++ b/src/main/java/datawave/data/normalizer/regex/RegexUtils.java @@ -345,6 +345,24 @@ public static boolean matchesChar(Node node, char character) { } } + public static boolean groupNodeMatches(Node node, char character) { + GroupNode group = (GroupNode) node; + boolean matchFound = false; + + for (Node child : group.getChildren()) { + // If the current child is a single character, see if it is a match for the character. + if (child instanceof SingleCharNode) { + if (isChar(child, character)) { + matchFound = true; + } else { + // A character other than the target was found, but there may be more in the group + continue; + } + } + } + return matchFound; + } + /** * Return whether the given node is a regex element that can only match against the given character. * @@ -374,6 +392,23 @@ public static boolean matchesZero(Node node) { return matchesChar(node, RegexConstants.ZERO); } + public static boolean matchesCharExplicitly(Node node, char character) { + switch (node.getType()) { + case SINGLE_CHAR: + return isChar(node, character); + case CHAR_CLASS: + return charClassMatches(node, character); + case GROUP: + return groupNodeMatches(node, character); + default: + return false; + } + } + + public static boolean matchesZeroExplicitly(Node node) { + return matchesCharExplicitly(node, RegexConstants.ZERO); + } + /** * Return whether the given node is a regex element that can only match against the character '0'. * diff --git a/src/main/java/datawave/data/normalizer/regex/visitor/ZeroTrimmer.java b/src/main/java/datawave/data/normalizer/regex/visitor/ZeroTrimmer.java index dcde159..ce740db 100644 --- a/src/main/java/datawave/data/normalizer/regex/visitor/ZeroTrimmer.java +++ b/src/main/java/datawave/data/normalizer/regex/visitor/ZeroTrimmer.java @@ -6,8 +6,11 @@ import org.apache.commons.lang3.tuple.Pair; +import datawave.data.normalizer.ZeroRegexStatus; import datawave.data.normalizer.regex.AnyCharNode; import datawave.data.normalizer.regex.EncodedPatternNode; +import datawave.data.normalizer.regex.EscapedSingleCharNode; +import datawave.data.normalizer.regex.ExpressionNode; import datawave.data.normalizer.regex.GroupNode; import datawave.data.normalizer.regex.IntegerNode; import datawave.data.normalizer.regex.IntegerRangeNode; @@ -42,6 +45,60 @@ public static Node trim(Node node) { return (Node) node.accept(visitor, null); } + public static ZeroRegexStatus getStatus(List encodedRegexNodes) { + if (hasPossiblyLeadingZeroes(encodedRegexNodes)) { + return ZeroRegexStatus.LEADING; + } else if (hasTrailingZeroes(encodedRegexNodes)) { + return ZeroRegexStatus.TRAILING; + } else + return ZeroRegexStatus.NONE; + + } + + private static boolean hasTrailingZeroes(List encodedRegexNodes) { + Collections.reverse(encodedRegexNodes); + + NodeListIterator iter = new NodeListIterator(encodedRegexNodes); + + while (iter.hasNext()) { + iter.seekPastQuantifiers(); + iter.seekPastQuestionMarks(); + + Node next = iter.peekNext(); + + if (RegexUtils.matchesZero(next)) { + if (RegexUtils.matchesZeroExplicitly(next)) { + return true; + } + iter.next(); + } else { + return false; + } + + } + return true; + + } + + private static boolean hasPossiblyLeadingZeroes(List encodedRegexNodes) { + NodeListIterator iter = new NodeListIterator(encodedRegexNodes); + + while (iter.hasNext()) { + Node next = iter.peekNext(); + + if (RegexUtils.matchesZero(next)) { + return true; + } else if (RegexUtils.isChar(next, RegexConstants.HYPHEN) || next.equals(new EscapedSingleCharNode(RegexConstants.PERIOD))) { + iter.next(); + } else { + return false; + } + } + + return true; + + } + @Override public Object visitEncodedPattern(EncodedPatternNode node, Object data) { EncodedPatternNode trimmed = new EncodedPatternNode(); diff --git a/src/test/java/datawave/data/normalizer/regex/visitor/ZeroTrimmerTest.java b/src/test/java/datawave/data/normalizer/regex/visitor/ZeroTrimmerTest.java index 0f89650..680dd31 100644 --- a/src/test/java/datawave/data/normalizer/regex/visitor/ZeroTrimmerTest.java +++ b/src/test/java/datawave/data/normalizer/regex/visitor/ZeroTrimmerTest.java @@ -5,8 +5,11 @@ import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; +import org.locationtech.jts.util.Assert; +import datawave.data.normalizer.ZeroRegexStatus; import datawave.data.normalizer.regex.Node; +import datawave.data.normalizer.regex.RegexParser; class ZeroTrimmerTest { @@ -284,6 +287,8 @@ void testNoLeadingOrTrailingZeros() { assertTrimmedTo("45.*", "\\+[b-z]E45.*"); assertTrimmedTo("300454.*", "\\+[f-z]E300454.*"); assertTrimmedTo("300.*0003", "\\+[c-z]E300.*0003"); + assertTrimmedTo("300.*000[1-9]", "\\+[c-z]E300.*000[1-9]"); + } @Test @@ -299,6 +304,35 @@ void testSingleElementPatterns() { assertTrimmedTo("\\d{3}", "\\+[a-c]E\\d{3}"); } + @Test + void testStatus() { + // TODO: more test cases + + ZeroRegexStatus status = ZeroRegexStatus.NONE; + assertStatus("300.*0003", status); + assertStatus("300.*000[1-9]", status); + assertStatus("45.*", status); + assertStatus("-45.*", status); + + status = ZeroRegexStatus.LEADING; + assertStatus(".*?", status); + assertStatus(".*?11", status); + assertStatus("[04][05][06]", status); + assertStatus("[04]{1,3}[05][06]", status); + assertStatus("\\d{3}", status); + assertStatus(".\\.000034.*", status); + assertStatus("00345.*", status); + assertStatus("\\.000034.*", status); + assertStatus("-00345.*", status); + + status = ZeroRegexStatus.TRAILING; + assertStatus("3.*0{0,}[01]", status); + assertStatus("3400\\.0000.", status); + assertStatus("340.*", status); + assertStatus("3400{3}0{2}", status); + + } + @Test void testTrailingZerosWithoutQuantifiers() { assertTrimmedTo(".*34300", "\\+[e-zA-Z]E.*343"); @@ -314,6 +348,10 @@ void testMixedAlternation() { assertTrimmedTo("234\\.45|343.*|0\\.00[0]34.*", "\\+cE2\\.3445|\\+[c-z]E343.*|\\+WE34.*"); } + private void assertStatus(String pattern, ZeroRegexStatus status) { + Assert.equals(ZeroTrimmer.getStatus(RegexParser.parse(pattern).getChildren()), status); + } + private void assertTrimmedTo(String pattern, String expectedPattern) { Node actual = SimpleNumberEncoder.encode(parse(pattern)); actual = ExponentialBinAdder.addBins(actual); From c1735c410318dc077071d411202bfeb3e35d2ccc Mon Sep 17 00:00:00 2001 From: hlgp Date: Wed, 2 Oct 2024 00:04:00 +0000 Subject: [PATCH 2/8] remove unused method --- .../data/normalizer/regex/NumericRegexEncoder.java | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/main/java/datawave/data/normalizer/regex/NumericRegexEncoder.java b/src/main/java/datawave/data/normalizer/regex/NumericRegexEncoder.java index 21e1ac2..4e0e885 100644 --- a/src/main/java/datawave/data/normalizer/regex/NumericRegexEncoder.java +++ b/src/main/java/datawave/data/normalizer/regex/NumericRegexEncoder.java @@ -321,16 +321,6 @@ private void encodePatternTree() { dedupe(); } - private void encodeWithoutTrimming() { - dedupe(); - encodeSimpleNumbers(); - // If there are no more unencoded sub-patterns in the tree after encoding simple numbers, no further work needs to be done. - if (!moreToEncode()) { - return; - } - addExponentialBins(); - } - /** * Parse the pattern to a node tree. */ From 8d1874204fd945c9b7228921b1d5a283bdcaa220 Mon Sep 17 00:00:00 2001 From: hlgp Date: Fri, 4 Oct 2024 14:33:26 +0000 Subject: [PATCH 3/8] implement a way to check if a normalized regex has lost information, or is lossy --- .../data/normalizer/AbstractGeometryNormalizer.java | 5 +++++ .../datawave/data/normalizer/AbstractNormalizer.java | 5 +++++ .../data/normalizer/LcNoDiacriticsNormalizer.java | 9 +++++++++ src/main/java/datawave/data/normalizer/Normalizer.java | 2 ++ .../java/datawave/data/normalizer/NumberNormalizer.java | 6 ++++-- 5 files changed, 25 insertions(+), 2 deletions(-) diff --git a/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java b/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java index 69c9c5e..665afd7 100644 --- a/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java +++ b/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java @@ -72,6 +72,11 @@ public String normalizeRegex(String fieldRegex) throws IllegalArgumentException throw new IllegalArgumentException("Cannot normalize a regex against a geometry field"); } + @Override + public boolean normalizedRegexIsLossy(String in) { + throw new IllegalArgumentException("Cannot normalize a regex against a geometry field"); + } + public String normalizeDelegateType(T geometry) { return getEncodedStringFromIndexBytes(getSingleIndexFromGeometry(geometry)); } diff --git a/src/main/java/datawave/data/normalizer/AbstractNormalizer.java b/src/main/java/datawave/data/normalizer/AbstractNormalizer.java index 6db6ed5..fcfb658 100644 --- a/src/main/java/datawave/data/normalizer/AbstractNormalizer.java +++ b/src/main/java/datawave/data/normalizer/AbstractNormalizer.java @@ -9,4 +9,9 @@ public abstract class AbstractNormalizer implements Normalizer { public Collection expand(String in) { return Collections.singletonList(normalize(in)); } + + @Override + public boolean normalizedRegexIsLossy(String in) { + return false; + } } diff --git a/src/main/java/datawave/data/normalizer/LcNoDiacriticsNormalizer.java b/src/main/java/datawave/data/normalizer/LcNoDiacriticsNormalizer.java index 707e725..7a66ddd 100644 --- a/src/main/java/datawave/data/normalizer/LcNoDiacriticsNormalizer.java +++ b/src/main/java/datawave/data/normalizer/LcNoDiacriticsNormalizer.java @@ -21,6 +21,7 @@ public class LcNoDiacriticsNormalizer extends AbstractNormalizer { private static final long serialVersionUID = -7922074256473963293L; private static final Pattern diacriticals = Pattern.compile("\\p{InCombiningDiacriticalMarks}"); + private static final Pattern capitals = Pattern.compile("[A-Z]"); public String normalize(String fieldValue) { if (null == fieldValue) { @@ -53,6 +54,14 @@ public String normalizeRegex(String fieldRegex) { } } + @Override + public boolean normalizedRegexIsLossy(String regex) { + Matcher diacriticMatcher = diacriticals.matcher(regex); + Matcher captialMatcher = capitals.matcher(regex); + + return (diacriticMatcher.matches() || captialMatcher.matches()); + } + @Override public String normalizeDelegateType(String delegateIn) { return normalize(delegateIn); diff --git a/src/main/java/datawave/data/normalizer/Normalizer.java b/src/main/java/datawave/data/normalizer/Normalizer.java index 4eab11a..6ef0aaf 100644 --- a/src/main/java/datawave/data/normalizer/Normalizer.java +++ b/src/main/java/datawave/data/normalizer/Normalizer.java @@ -36,5 +36,7 @@ public interface Normalizer extends Serializable { String normalizeRegex(String in); + boolean normalizedRegexIsLossy(String in); + Collection expand(String in); } diff --git a/src/main/java/datawave/data/normalizer/NumberNormalizer.java b/src/main/java/datawave/data/normalizer/NumberNormalizer.java index 0a75651..d1bb4ea 100644 --- a/src/main/java/datawave/data/normalizer/NumberNormalizer.java +++ b/src/main/java/datawave/data/normalizer/NumberNormalizer.java @@ -40,8 +40,10 @@ public String normalizeRegex(String fieldRegex) { } } - public ZeroRegexStatus getZeroRegexStatus(String untrimmedRegex) { - return NumericRegexEncoder.getZeroRegexStatus(untrimmedRegex); + public boolean normalizedRegexIsLossy(String untrimmedRegex) { + ZeroRegexStatus status = NumericRegexEncoder.getZeroRegexStatus(untrimmedRegex); + + return (status.equals(ZeroRegexStatus.LEADING) || status.equals(ZeroRegexStatus.TRAILING)); } @Override From 07da00a4a7f586cd90469334f2f69a1b392418f6 Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Fri, 4 Oct 2024 19:10:26 +0000 Subject: [PATCH 4/8] Return false for lossy regex on Lc normalizers for now --- .../data/normalizer/LcNoDiacriticsNormalizer.java | 9 +++++---- src/main/java/datawave/data/normalizer/LcNormalizer.java | 9 +++++++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/main/java/datawave/data/normalizer/LcNoDiacriticsNormalizer.java b/src/main/java/datawave/data/normalizer/LcNoDiacriticsNormalizer.java index 7a66ddd..f7d3bca 100644 --- a/src/main/java/datawave/data/normalizer/LcNoDiacriticsNormalizer.java +++ b/src/main/java/datawave/data/normalizer/LcNoDiacriticsNormalizer.java @@ -56,10 +56,11 @@ public String normalizeRegex(String fieldRegex) { @Override public boolean normalizedRegexIsLossy(String regex) { - Matcher diacriticMatcher = diacriticals.matcher(regex); - Matcher captialMatcher = capitals.matcher(regex); - - return (diacriticMatcher.matches() || captialMatcher.matches()); + // Despite this normalizer actually being lossy, we are still + // returning false as users are used to overmathing when including + // diacritics or upper case letter. We may consider changing this + // down the road, but for not returning false. + return false; } @Override diff --git a/src/main/java/datawave/data/normalizer/LcNormalizer.java b/src/main/java/datawave/data/normalizer/LcNormalizer.java index c4c53b2..148ac2b 100644 --- a/src/main/java/datawave/data/normalizer/LcNormalizer.java +++ b/src/main/java/datawave/data/normalizer/LcNormalizer.java @@ -29,6 +29,15 @@ public String normalizeRegex(String fieldRegex) { } } + @Override + public boolean normalizedRegexIsLossy(String regex) { + // Despite this normalizer actually being lossy, we are still + // returning false as users are used to overmathing when including + // diacritics or upper case letter. We may consider changing this + // down the road, but for not returning false. + return false; + } + @Override public String normalizeDelegateType(String delegateIn) { return normalize(delegateIn); From 4b00e5387b3f0cb2cc937f1fd244d5d45c1b4105 Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Fri, 4 Oct 2024 19:33:00 +0000 Subject: [PATCH 5/8] Extend the lossy regex method into the type --- src/main/java/datawave/data/type/BaseType.java | 5 +++++ src/main/java/datawave/data/type/Type.java | 2 ++ 2 files changed, 7 insertions(+) diff --git a/src/main/java/datawave/data/type/BaseType.java b/src/main/java/datawave/data/type/BaseType.java index f27238f..9ae9b5c 100644 --- a/src/main/java/datawave/data/type/BaseType.java +++ b/src/main/java/datawave/data/type/BaseType.java @@ -80,6 +80,11 @@ public String normalizeRegex(String in) { return normalizer.normalizeRegex(in); } + @Override + public boolean normalizedRegexIsLossy(String in) { + return normalizer.normalizedRegexIsLossy(in); + } + @Override public void normalizeAndSetNormalizedValue(T valueToNormalize) { setNormalizedValue(normalizer.normalizeDelegateType(valueToNormalize)); diff --git a/src/main/java/datawave/data/type/Type.java b/src/main/java/datawave/data/type/Type.java index ee0e717..9025a38 100644 --- a/src/main/java/datawave/data/type/Type.java +++ b/src/main/java/datawave/data/type/Type.java @@ -12,6 +12,8 @@ public interface Type> extends Comparable> { String normalizeRegex(String in); + boolean normalizedRegexIsLossy(String in); + Collection expand(String in); Collection expand(); From 1653964d4f810cc9e2dbdf2e539496bacdbbfd7b Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Mon, 7 Oct 2024 12:10:14 +0000 Subject: [PATCH 6/8] review comments --- .../datawave/data/normalizer/LcNoDiacriticsNormalizer.java | 5 ++--- src/main/java/datawave/data/normalizer/LcNormalizer.java | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/java/datawave/data/normalizer/LcNoDiacriticsNormalizer.java b/src/main/java/datawave/data/normalizer/LcNoDiacriticsNormalizer.java index f7d3bca..75660ef 100644 --- a/src/main/java/datawave/data/normalizer/LcNoDiacriticsNormalizer.java +++ b/src/main/java/datawave/data/normalizer/LcNoDiacriticsNormalizer.java @@ -21,8 +21,7 @@ public class LcNoDiacriticsNormalizer extends AbstractNormalizer { private static final long serialVersionUID = -7922074256473963293L; private static final Pattern diacriticals = Pattern.compile("\\p{InCombiningDiacriticalMarks}"); - private static final Pattern capitals = Pattern.compile("[A-Z]"); - + public String normalize(String fieldValue) { if (null == fieldValue) { return null; @@ -57,7 +56,7 @@ public String normalizeRegex(String fieldRegex) { @Override public boolean normalizedRegexIsLossy(String regex) { // Despite this normalizer actually being lossy, we are still - // returning false as users are used to overmathing when including + // returning false as users are used to overmatching when including // diacritics or upper case letter. We may consider changing this // down the road, but for not returning false. return false; diff --git a/src/main/java/datawave/data/normalizer/LcNormalizer.java b/src/main/java/datawave/data/normalizer/LcNormalizer.java index 148ac2b..2b5ddae 100644 --- a/src/main/java/datawave/data/normalizer/LcNormalizer.java +++ b/src/main/java/datawave/data/normalizer/LcNormalizer.java @@ -32,7 +32,7 @@ public String normalizeRegex(String fieldRegex) { @Override public boolean normalizedRegexIsLossy(String regex) { // Despite this normalizer actually being lossy, we are still - // returning false as users are used to overmathing when including + // returning false as users are used to overmatching when including // diacritics or upper case letter. We may consider changing this // down the road, but for not returning false. return false; From 917c261d9e1a521d2de76d719a4c570df42c29eb Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Mon, 7 Oct 2024 12:11:02 +0000 Subject: [PATCH 7/8] comment --- .../java/datawave/data/normalizer/LcNoDiacriticsNormalizer.java | 2 +- src/main/java/datawave/data/normalizer/LcNormalizer.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/datawave/data/normalizer/LcNoDiacriticsNormalizer.java b/src/main/java/datawave/data/normalizer/LcNoDiacriticsNormalizer.java index 75660ef..1930395 100644 --- a/src/main/java/datawave/data/normalizer/LcNoDiacriticsNormalizer.java +++ b/src/main/java/datawave/data/normalizer/LcNoDiacriticsNormalizer.java @@ -58,7 +58,7 @@ public boolean normalizedRegexIsLossy(String regex) { // Despite this normalizer actually being lossy, we are still // returning false as users are used to overmatching when including // diacritics or upper case letter. We may consider changing this - // down the road, but for not returning false. + // down the road, but for now returning false. return false; } diff --git a/src/main/java/datawave/data/normalizer/LcNormalizer.java b/src/main/java/datawave/data/normalizer/LcNormalizer.java index 2b5ddae..ab678ec 100644 --- a/src/main/java/datawave/data/normalizer/LcNormalizer.java +++ b/src/main/java/datawave/data/normalizer/LcNormalizer.java @@ -34,7 +34,7 @@ public boolean normalizedRegexIsLossy(String regex) { // Despite this normalizer actually being lossy, we are still // returning false as users are used to overmatching when including // diacritics or upper case letter. We may consider changing this - // down the road, but for not returning false. + // down the road, but for now returning false. return false; } From 73dbd070f58dee082fd51f98164f0e34f8bfcbb4 Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Mon, 7 Oct 2024 12:16:16 +0000 Subject: [PATCH 8/8] formatting --- .../java/datawave/data/normalizer/LcNoDiacriticsNormalizer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/datawave/data/normalizer/LcNoDiacriticsNormalizer.java b/src/main/java/datawave/data/normalizer/LcNoDiacriticsNormalizer.java index 1930395..2dde04c 100644 --- a/src/main/java/datawave/data/normalizer/LcNoDiacriticsNormalizer.java +++ b/src/main/java/datawave/data/normalizer/LcNoDiacriticsNormalizer.java @@ -21,7 +21,7 @@ public class LcNoDiacriticsNormalizer extends AbstractNormalizer { private static final long serialVersionUID = -7922074256473963293L; private static final Pattern diacriticals = Pattern.compile("\\p{InCombiningDiacriticalMarks}"); - + public String normalize(String fieldValue) { if (null == fieldValue) { return null;