Skip to content

Commit

Permalink
grok validate pattern - change to iterative approach
Browse files Browse the repository at this point in the history
  • Loading branch information
sandeshkr419 committed Jun 12, 2024
1 parent 2f8cb07 commit 709b113
Showing 1 changed file with 117 additions and 38 deletions.
155 changes: 117 additions & 38 deletions libs/grok/src/main/java/org/opensearch/grok/Grok.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,14 @@
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import java.util.function.Consumer;

Expand Down Expand Up @@ -138,51 +141,127 @@ private Grok(
this.captureConfig = unmodifiableList(captureConfig);
}

/**
* Entry point to recursively validate the pattern bank for circular dependencies and malformed URLs
* via depth-first traversal. This implementation does not include memoization.
*/
private void validatePatternBank() {
// /**
// * Entry point to recursively validate the pattern bank for circular dependencies and malformed URLs
// * via depth-first traversal. This implementation does not include memoization.
// */
// private void validatePatternBank() {
// for (String patternName : patternBank.keySet()) {
// validatePatternBank(patternName, new Stack<>());
// }
// }
//
// /**
// * Checks whether patterns reference each other in a circular manner and, if so, fail with an exception.
// * Also checks for malformed pattern definitions and fails with an exception.
// * <p>
// * In a pattern, anything between <code>%{</code> and <code>}</code> or <code>:</code> is considered
// * a reference to another named pattern. This method will navigate to all these named patterns and
// * check for a circular reference.
// */
// private void validatePatternBank(String patternName, Stack<String> path) {
// String pattern = patternBank.get(patternName);
// boolean isSelfReference = pattern.contains("%{" + patternName + "}") || pattern.contains("%{" + patternName + ":");
// if (isSelfReference) {
// throwExceptionForCircularReference(patternName, pattern);
// } else if (path.contains(patternName)) {
// // current pattern name is already in the path, fetch its predecessor
// String prevPatternName = path.pop();
// String prevPattern = patternBank.get(prevPatternName);
// throwExceptionForCircularReference(prevPatternName, prevPattern, patternName, path);
// }
// path.push(patternName);
// for (int i = pattern.indexOf("%{"); i != -1; i = pattern.indexOf("%{", i + 1)) {
// int begin = i + 2;
// int syntaxEndIndex = pattern.indexOf('}', begin);
// if (syntaxEndIndex == -1) {
// throw new IllegalArgumentException("Malformed pattern [" + patternName + "][" + pattern + "]");
// }
// int semanticNameIndex = pattern.indexOf(':', begin);
// int end = syntaxEndIndex;
// if (semanticNameIndex != -1) {
// end = Math.min(syntaxEndIndex, semanticNameIndex);
// }
// String dependsOnPattern = pattern.substring(begin, end);
// validatePatternBank(dependsOnPattern, path);
// }
// path.pop();
// }

// private Map<String, String> patternBank = new HashMap<>();

public void validatePatternBank() {
for (String patternName : patternBank.keySet()) {
validatePatternBank(patternName, new Stack<>());
validatePatternBankIterative(patternName);
}
}

/**
* Checks whether patterns reference each other in a circular manner and, if so, fail with an exception.
* Also checks for malformed pattern definitions and fails with an exception.
* <p>
* In a pattern, anything between <code>%{</code> and <code>}</code> or <code>:</code> is considered
* a reference to another named pattern. This method will navigate to all these named patterns and
* check for a circular reference.
*/
private void validatePatternBank(String patternName, Stack<String> path) {
String pattern = patternBank.get(patternName);
boolean isSelfReference = pattern.contains("%{" + patternName + "}") || pattern.contains("%{" + patternName + ":");
if (isSelfReference) {
throwExceptionForCircularReference(patternName, pattern);
} else if (path.contains(patternName)) {
// current pattern name is already in the path, fetch its predecessor
String prevPatternName = path.pop();
String prevPattern = patternBank.get(prevPatternName);
throwExceptionForCircularReference(prevPatternName, prevPattern, patternName, path);
}
path.push(patternName);
for (int i = pattern.indexOf("%{"); i != -1; i = pattern.indexOf("%{", i + 1)) {
int begin = i + 2;
int syntaxEndIndex = pattern.indexOf('}', begin);
if (syntaxEndIndex == -1) {
throw new IllegalArgumentException("Malformed pattern [" + patternName + "][" + pattern + "]");
private void validatePatternBankIterative(String patternName) {
Stack<String> path = new Stack<>();
Stack<PatternState> stack = new Stack<>();
Set<String> visited = new HashSet<>();

stack.push(new PatternState(patternName, 0));

while (!stack.isEmpty()) {
PatternState currentState = stack.pop();
String currentPatternName = currentState.patternName;
int startIndex = currentState.startIndex;

if (path.contains(currentPatternName)) {
// Current pattern name is already in the path, indicating a circular reference.
String prevPatternName = path.pop();
String prevPattern = patternBank.get(prevPatternName);
throwExceptionForCircularReference(prevPatternName, prevPattern, currentPatternName, path);
}
int semanticNameIndex = pattern.indexOf(':', begin);
int end = syntaxEndIndex;
if (semanticNameIndex != -1) {
end = Math.min(syntaxEndIndex, semanticNameIndex);

path.push(currentPatternName);

String pattern = patternBank.get(currentPatternName);
if (pattern.contains("%{" + currentPatternName + "}") || pattern.contains("%{" + currentPatternName + ":")) {
throwExceptionForCircularReference(currentPatternName, pattern);
}
String dependsOnPattern = pattern.substring(begin, end);
validatePatternBank(dependsOnPattern, path);

boolean hasDependencies = false;
for (int i = startIndex; i < pattern.length(); i = pattern.indexOf("%{", i + 1)) {
if (i == -1) {
break;
}
int begin = i + 2;
int syntaxEndIndex = pattern.indexOf('}', begin);
if (syntaxEndIndex == -1) {
throw new IllegalArgumentException("Malformed pattern [" + currentPatternName + "][" + pattern + "]");
}
int semanticNameIndex = pattern.indexOf(':', begin);
int end = syntaxEndIndex;
if (semanticNameIndex != -1) {
end = Math.min(syntaxEndIndex, semanticNameIndex);
}
String dependsOnPattern = pattern.substring(begin, end);

if (!visited.contains(dependsOnPattern)) {
stack.push(new PatternState(currentPatternName, i + 1));
stack.push(new PatternState(dependsOnPattern, 0));
hasDependencies = true;
break;
}
}

if (!hasDependencies) {
visited.add(currentPatternName);
path.pop();
}
}
}

private class PatternState {
String patternName;
int startIndex;

PatternState(String patternName, int startIndex) {
this.patternName = patternName;
this.startIndex = startIndex;
}
path.pop();
}

private static void throwExceptionForCircularReference(String patternName, String pattern) {
Expand Down

0 comments on commit 709b113

Please sign in to comment.