From c1ffbc4fe84900f8ac4740c321b343b79bdf32d1 Mon Sep 17 00:00:00 2001 From: Craig Perkins Date: Wed, 1 Nov 2023 20:05:06 -0400 Subject: [PATCH] Remove adjacent duplicates to optimize regex before processing Signed-off-by: Craig Perkins --- .../org/opensearch/common/regex/Regex.java | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/common/regex/Regex.java b/server/src/main/java/org/opensearch/common/regex/Regex.java index 396af77c8a751..fffe5f6a70057 100644 --- a/server/src/main/java/org/opensearch/common/regex/Regex.java +++ b/server/src/main/java/org/opensearch/common/regex/Regex.java @@ -93,6 +93,25 @@ public static Automaton simpleMatchToAutomaton(String... patterns) { return Operations.union(automata); } + /** + * + * @param str - The input string to remove adjacent duplicate characters from + * @param target - The target character to remove duplicates of + * @return + */ + public static String removeDuplicates(String str, char target) { + StringBuilder sb = new StringBuilder(); + for (char c : str.toCharArray()) { + int size = sb.length(); + if (size > 0 && c == target && sb.charAt(size - 1) == c) { + sb.deleteCharAt(size - 1); + } else { + sb.append(c); + } + } + return sb.toString(); + } + /** * Match a String against the given pattern, supporting the following simple * pattern styles: "xxx*", "*xxx", "*xxx*" and "xxx*yyy" matches (with an @@ -104,7 +123,8 @@ public static Automaton simpleMatchToAutomaton(String... patterns) { * @return whether the String matches the given pattern */ public static boolean simpleMatch(String pattern, String str) { - return simpleMatch(pattern, str, false); + String trimmedPattern = removeDuplicates(pattern, '*'); + return simpleMatch(trimmedPattern, str, false); } /**