From cfcc9a9bba2baf1c91a16fff6303d73fe47e61bc Mon Sep 17 00:00:00 2001 From: Stephen Crawford <65832608+scrawfor99@users.noreply.github.com> Date: Thu, 9 Nov 2023 18:22:58 -0500 Subject: [PATCH] Manually backport #11060 (simple regex normalization refactor) (#11148) * Manually backport 11060 Signed-off-by: Stephen Crawford * spotless Signed-off-by: Stephen Crawford --------- Signed-off-by: Stephen Crawford --- CHANGELOG.md | 2 + .../org/opensearch/common/regex/Regex.java | 54 +++++++++---------- .../opensearch/common/regex/RegexTests.java | 15 ++++++ 3 files changed, 44 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 56fb5aacd00ee..6c9c28f3a7758 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Bump `netty` from 4.1.96.Final to 4.1.97.Final ([#9553](https://github.com/opensearch-project/OpenSearch/pull/9553)) ### Changed +- Use iterative approach to evaluate Regex.simpleMatch ([#11060](https://github.com/opensearch-project/OpenSearch/pull/11060)) + ### Deprecated ### Removed ### Fixed diff --git a/server/src/main/java/org/opensearch/common/regex/Regex.java b/server/src/main/java/org/opensearch/common/regex/Regex.java index dc42b09848e36..4b0ccc39f2db2 100644 --- a/server/src/main/java/org/opensearch/common/regex/Regex.java +++ b/server/src/main/java/org/opensearch/common/regex/Regex.java @@ -124,35 +124,35 @@ public static boolean simpleMatch(String pattern, String str, boolean caseInsens } private static boolean simpleMatchWithNormalizedStrings(String pattern, String str) { - final int firstIndex = pattern.indexOf('*'); - if (firstIndex == -1) { - return pattern.equals(str); - } - if (firstIndex == 0) { - if (pattern.length() == 1) { - return true; - } - final int nextIndex = pattern.indexOf('*', firstIndex + 1); - if (nextIndex == -1) { - // str.endsWith(pattern.substring(1)), but avoiding the construction of pattern.substring(1): - return str.regionMatches(str.length() - pattern.length() + 1, pattern, 1, pattern.length() - 1); - } else if (nextIndex == 1) { - // Double wildcard "**" - skipping the first "*" - return simpleMatchWithNormalizedStrings(pattern.substring(1), str); - } - final String part = pattern.substring(1, nextIndex); - int partIndex = str.indexOf(part); - while (partIndex != -1) { - if (simpleMatchWithNormalizedStrings(pattern.substring(nextIndex), str.substring(partIndex + part.length()))) { - return true; - } - partIndex = str.indexOf(part, partIndex + 1); + int sIdx = 0, pIdx = 0, match = 0, wildcardIdx = -1; + while (sIdx < str.length()) { + // both chars matching, incrementing both pointers + if (pIdx < pattern.length() && str.charAt(sIdx) == pattern.charAt(pIdx)) { + sIdx++; + pIdx++; + } else if (pIdx < pattern.length() && pattern.charAt(pIdx) == '*') { + // wildcard found, only incrementing pattern pointer + wildcardIdx = pIdx; + match = sIdx; + pIdx++; + } else if (wildcardIdx != -1) { + // last pattern pointer was a wildcard, incrementing string pointer + pIdx = wildcardIdx + 1; + match++; + sIdx = match; + } else { + // current pattern pointer is not a wildcard, last pattern pointer was also not a wildcard + // characters do not match + return false; } - return false; } - return str.regionMatches(0, pattern, 0, firstIndex) - && (firstIndex == pattern.length() - 1 // only wildcard in pattern is at the end, so no need to look at the rest of the string - || simpleMatchWithNormalizedStrings(pattern.substring(firstIndex), str.substring(firstIndex))); + + // check for remaining characters in pattern + while (pIdx < pattern.length() && pattern.charAt(pIdx) == '*') { + pIdx++; + } + + return pIdx == pattern.length(); } /** diff --git a/server/src/test/java/org/opensearch/common/regex/RegexTests.java b/server/src/test/java/org/opensearch/common/regex/RegexTests.java index b92fcdad56d74..4a3772b77fde7 100644 --- a/server/src/test/java/org/opensearch/common/regex/RegexTests.java +++ b/server/src/test/java/org/opensearch/common/regex/RegexTests.java @@ -97,6 +97,21 @@ public void testDoubleWildcardMatch() { assertTrue(Regex.simpleMatch("fff*******ddd", "fffabcddd")); assertTrue(Regex.simpleMatch("fff*******ddd", "FffAbcdDd", true)); assertFalse(Regex.simpleMatch("fff******ddd", "fffabcdd")); + assertFalse(Regex.simpleMatch("fff*******ddd", "FffAbcdDd", false)); + assertTrue(Regex.simpleMatch("abCDefGH******ddd", "abCDefGHddd", false)); + assertTrue(Regex.simpleMatch("******", "a")); + assertTrue(Regex.simpleMatch("***WILDcard***", "aaaaaaaaWILDcardZZZZZZ", false)); + assertFalse(Regex.simpleMatch("***xxxxx123456789xxxxxx***", "xxxxxabcdxxxxx", false)); + assertFalse(Regex.simpleMatch("***xxxxxabcdxxxxx***", "xxxxxABCDxxxxx", false)); + assertTrue(Regex.simpleMatch("***xxxxxabcdxxxxx***", "xxxxxABCDxxxxx", true)); + assertTrue(Regex.simpleMatch("**stephenIsSuperCool**", "ItIsTrueThatStephenIsSuperCoolSoYouShouldLetThisIn", true)); + assertTrue( + Regex.simpleMatch( + "**w**X**y**Z**", + "abcdeFGHIJKLMNOPqrstuvwabcdeFGHIJKLMNOPqrstuvwXabcdeFGHIJKLMNOPqrstuvwXyabcdeFGHIJKLMNOPqrstuvwXyZ", + false + ) + ); } public void testSimpleMatch() {