forked from intel/hyperscan
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merged PR 2495892: Avoid negative lookbehind in 920120
Same as SpiderLabs/owasp-modsecurity-crs#1663 . I doubt they will accept the PR in their repo as the downsides seem to outweigh the benefits for them. PR URL: https://msazure.visualstudio.com/DefaultCollection/One/_git/Networking-Azwaf/pullrequest/2495892 Related work items: #5880651
- Loading branch information
Showing
8 changed files
with
1,194 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
8 changes: 8 additions & 0 deletions
8
secrule/rulesetfiles/crs3.1/util/regexp-negativelookbehind/negativelookbehind-920120.data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
&[aAoOuUyY]uml | ||
&[aAeEiIoOuU]circ | ||
&[eEiIoOuUyY]acute | ||
&[aAeEiIoOuU]grave | ||
&[cC]cedil | ||
&[aAnNoO]tilde | ||
& | ||
&apos |
150 changes: 150 additions & 0 deletions
150
secrule/rulesetfiles/crs3.1/util/regexp-negativelookbehind/negativelookbehind.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
#!/usr/bin/env python | ||
|
||
import fileinput | ||
|
||
# | ||
# This script generates regular expressions that behave like negative lookbehinds without using negative lookbehinds. | ||
# For example an alternative to "(?<!a[bB]c|1234)" would be "(?:(?:^|[^c4])|(?:^|[^bB])c|(?:^|[^3])4|(?:^|[^a])[bB]c|(?:^|[^2])34|(?:^|[^1])234)". | ||
# More explanation here: http://allanrbo.blogspot.com/2020/01/alternative-to-negative-lookbehinds-in.html | ||
# | ||
# Input (stdin or arg): a file where each line corresponds to an alternative-group in a negative lookbehind. | ||
# Example to generate a regex equivalent to "(?<!a[bB]c|1234)": | ||
# a[bB]c | ||
# 1234 | ||
# Output: A regular expression corresponding to the negative lookbehind. | ||
# | ||
|
||
|
||
# Process lines from input file, or if not specified, standard input | ||
negativePrefixes = [] | ||
for line in fileinput.input(): | ||
line = line.rstrip() | ||
if line != "": | ||
negativePrefixes.append(line) | ||
|
||
def removeDuplicateChars(s): | ||
return "".join([c for i,c in enumerate(s) if c not in s[:i]]) | ||
|
||
def removeChars(s, charsToRemove): | ||
return "".join([c for i,c in enumerate(s) if c not in charsToRemove]) | ||
|
||
# Split into arrays of strings. Each string is either a single char, or a char class. | ||
negativePrefixesSplit = [] | ||
for np in negativePrefixes: | ||
npSplit = [] | ||
curCc = "" | ||
inCc = False | ||
for c in np: | ||
if c == "[": | ||
inCc = True | ||
elif c == "]": | ||
npSplit.append(removeDuplicateChars(curCc)) | ||
curCc = "" | ||
inCc = False | ||
else: | ||
if inCc: | ||
if c in "-\\": | ||
raise "Only really simply char classes are currently supported. No ranges or escapes, sorry." | ||
curCc += c | ||
else: | ||
npSplit.append(c) | ||
negativePrefixesSplit.append(npSplit) | ||
|
||
allexprs = [] | ||
|
||
class Expr(): | ||
pass | ||
|
||
suffixLength = 0 | ||
while True: | ||
suffixes = [] | ||
for np in negativePrefixesSplit: | ||
if suffixLength < len(np): | ||
suffixes.append(np[len(np)-suffixLength-1:]) | ||
|
||
if len(suffixes) == 0: | ||
break | ||
|
||
exprs = [] | ||
for suffix in suffixes: | ||
curChar = suffix[0] | ||
remainder = suffix[1:] | ||
expr = Expr() | ||
expr.curChar = curChar | ||
expr.remainder = remainder | ||
exprs.append(expr) | ||
|
||
# Is the remainder a subset of any other suffixes remainders? | ||
for i in range(len(exprs)): | ||
e1 = exprs[i] | ||
for j in range(len(exprs)): | ||
e2 = exprs[j] | ||
isSubset = True | ||
for k in range(len(e1.remainder)): | ||
if not set(e1.remainder[k]).issubset(set(e2.remainder[k])): | ||
isSubset = False | ||
break | ||
if isSubset: | ||
if e1.curChar == e2.curChar: | ||
e1.remainder = e2.remainder | ||
continue | ||
|
||
e1.curChar += e2.curChar | ||
e1.curChar = removeDuplicateChars(e1.curChar) | ||
for k in range(len(e1.remainder)): | ||
if len(set(e2.remainder[k]) - set(e1.remainder[k])) > 0: | ||
charsInCommon = "".join(set(e2.remainder[k]) & set(e1.remainder[k])) | ||
e2.remainder[k] = removeChars(e2.remainder[k], charsInCommon) | ||
|
||
# Remove duplicate expressions | ||
exprsFiltered = [] | ||
for i in range(len(exprs)): | ||
e1 = exprs[i] | ||
alreadyExists = False | ||
for j in range(len(exprs)): | ||
if i == j: | ||
break | ||
|
||
e2 = exprs[j] | ||
|
||
sameC = set(e1.curChar) == set(e2.curChar) | ||
sameR = True | ||
for k in range(len(e1.remainder)): | ||
if set(e1.remainder[k]) != set(e2.remainder[k]): | ||
sameR = False | ||
break | ||
if sameC and sameR: | ||
alreadyExists = True | ||
break | ||
|
||
if not alreadyExists: | ||
exprsFiltered.append(e1) | ||
|
||
allexprs.extend(exprsFiltered) | ||
|
||
suffixLength += 1 | ||
continue | ||
|
||
out = "(?:\n" | ||
for i in range(len(allexprs)): | ||
e = allexprs[i] | ||
out += ("(?:^|[^" + e.curChar + "])") | ||
for c in e.remainder: | ||
if len(c) > 1: | ||
out += "[" + c + "]" | ||
else: | ||
out += c | ||
if i != len(allexprs)-1: | ||
out += "|" | ||
out += "\n" | ||
out += ")" | ||
|
||
print("Human readable:") | ||
print(out) | ||
print() | ||
print("Single line:") | ||
print(out.replace("\n","")) | ||
|
||
|
||
|
||
|
Oops, something went wrong.