Skip to content

Commit

Permalink
Merged PR 2495892: Avoid negative lookbehind in 920120
Browse files Browse the repository at this point in the history
Same as SpiderLabs/owasp-modsecurity-crs#1663 . I doubt they will accept the PR in their repo as the downsides seem to outweigh the benefits for them.

PR URL: https://msazure.visualstudio.com/DefaultCollection/One/_git/Networking-Azwaf/pullrequest/2495892

Related work items: #5880651
  • Loading branch information
allanrbo committed Jan 27, 2020
1 parent e73318a commit 5bc3313
Show file tree
Hide file tree
Showing 8 changed files with 1,194 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,12 @@ SecRule REQUEST_LINE "!@rx ^(?i:(?:[a-z]{3,10}\s+(?:\w{3,7}?://[\w\-\./]*(?::\d+
# These rules check for the existence of the ' " ; = meta-characters in
# either the file or file name variables.
# HTML entities may lead to false positives, why they are allowed on PL1.
# Negative look behind assertions allow frequently used entities &_;
# Frequently used HTML entities such as ä are allowed.
#
# To be compatible with non-PCRE regex engines, negative lookbehinds are
# avoided. Instead the script in util/regexp-negativelookbehind was used to
# generate an alternative equivalent regex:
# ./negativelookbehind.py negativelookbehind-920120.data
#
# -=[ Targets, characters and html entities ]=-
#
Expand All @@ -94,7 +99,7 @@ SecRule REQUEST_LINE "!@rx ^(?i:(?:[a-z]{3,10}\s+(?:\w{3,7}?://[\w\-\./]*(?::\d+
# https://www.owasp.org/index.php/ModSecurity_CRS_RuleID-960000
# http://www.ietf.org/rfc/rfc2183.txt
#
SecRule FILES_NAMES|FILES "@rx (?<!&(?:[aAoOuUyY]uml)|&(?:[aAeEiIoOuU]circ)|&(?:[eEiIoOuUyY]acute)|&(?:[aAeEiIoOuU]grave)|&(?:[cC]cedil)|&(?:[aAnNoO]tilde)|&(?:amp)|&(?:apos));|['\"=]" \
SecRule FILES_NAMES|FILES "@rx (?:(?:^|[^lceps])|(?:^|[^mi])l|(?:^|[^r])c|(?:^|[^tvd])e|(?:^|[^m])p|(?:^|[^o])s|(?:^|[^u])ml|(?:^|[^i])rc|(?:^|[^u])te|(?:^|[^a])ve|(?:^|[^d])il|(?:^|[^l])de|(?:^|[^a])mp|(?:^|[^p])os|(?:^|[^aAoOuUyY])uml|(?:^|[^c])irc|(?:^|[^c])ute|(?:^|[^r])ave|(?:^|[^e])dil|(?:^|[^i])lde|(?:^|[^&])amp|(?:^|[^a])pos|(?:^|[^&])[aAoOuUyY]uml|(?:^|[^aAeEiIoOuU])circ|(?:^|[^a])cute|(?:^|[^g])rave|(?:^|[^c])edil|(?:^|[^t])ilde|(?:^|[^&])apos|(?:^|[^&])[aAeEiIoOuU]circ|(?:^|[^eEiIoOuUyY])acute|(?:^|[^aAeEiIoOuU])grave|(?:^|[^cC])cedil|(?:^|[^aAnNoO])tilde|(?:^|[^&])[eEiIoOuUyY]acute|(?:^|[^&])[aAeEiIoOuU]grave|(?:^|[^&])[cC]cedil|(?:^|[^&])[aAnNoO]tilde);|['\"=]" \
"id:920120,\
phase:2,\
block,\
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
&[aAoOuUyY]uml
&[aAeEiIoOuU]circ
&[eEiIoOuUyY]acute
&[aAeEiIoOuU]grave
&[cC]cedil
&[aAnNoO]tilde
&amp
&apos
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
#!/usr/bin/env python

import fileinput

#
# This script generates regular expressions that behave like negative lookbehinds without using negative lookbehinds.
# For example an alternative to "(?<!a[bB]c|1234)" would be "(?:(?:^|[^c4])|(?:^|[^bB])c|(?:^|[^3])4|(?:^|[^a])[bB]c|(?:^|[^2])34|(?:^|[^1])234)".
# More explanation here: http://allanrbo.blogspot.com/2020/01/alternative-to-negative-lookbehinds-in.html
#
# Input (stdin or arg): a file where each line corresponds to an alternative-group in a negative lookbehind.
# Example to generate a regex equivalent to "(?<!a[bB]c|1234)":
# a[bB]c
# 1234
# Output: A regular expression corresponding to the negative lookbehind.
#


# Process lines from input file, or if not specified, standard input
negativePrefixes = []
for line in fileinput.input():
line = line.rstrip()
if line != "":
negativePrefixes.append(line)

def removeDuplicateChars(s):
return "".join([c for i,c in enumerate(s) if c not in s[:i]])

def removeChars(s, charsToRemove):
return "".join([c for i,c in enumerate(s) if c not in charsToRemove])

# Split into arrays of strings. Each string is either a single char, or a char class.
negativePrefixesSplit = []
for np in negativePrefixes:
npSplit = []
curCc = ""
inCc = False
for c in np:
if c == "[":
inCc = True
elif c == "]":
npSplit.append(removeDuplicateChars(curCc))
curCc = ""
inCc = False
else:
if inCc:
if c in "-\\":
raise "Only really simply char classes are currently supported. No ranges or escapes, sorry."
curCc += c
else:
npSplit.append(c)
negativePrefixesSplit.append(npSplit)

allexprs = []

class Expr():
pass

suffixLength = 0
while True:
suffixes = []
for np in negativePrefixesSplit:
if suffixLength < len(np):
suffixes.append(np[len(np)-suffixLength-1:])

if len(suffixes) == 0:
break

exprs = []
for suffix in suffixes:
curChar = suffix[0]
remainder = suffix[1:]
expr = Expr()
expr.curChar = curChar
expr.remainder = remainder
exprs.append(expr)

# Is the remainder a subset of any other suffixes remainders?
for i in range(len(exprs)):
e1 = exprs[i]
for j in range(len(exprs)):
e2 = exprs[j]
isSubset = True
for k in range(len(e1.remainder)):
if not set(e1.remainder[k]).issubset(set(e2.remainder[k])):
isSubset = False
break
if isSubset:
if e1.curChar == e2.curChar:
e1.remainder = e2.remainder
continue

e1.curChar += e2.curChar
e1.curChar = removeDuplicateChars(e1.curChar)
for k in range(len(e1.remainder)):
if len(set(e2.remainder[k]) - set(e1.remainder[k])) > 0:
charsInCommon = "".join(set(e2.remainder[k]) & set(e1.remainder[k]))
e2.remainder[k] = removeChars(e2.remainder[k], charsInCommon)

# Remove duplicate expressions
exprsFiltered = []
for i in range(len(exprs)):
e1 = exprs[i]
alreadyExists = False
for j in range(len(exprs)):
if i == j:
break

e2 = exprs[j]

sameC = set(e1.curChar) == set(e2.curChar)
sameR = True
for k in range(len(e1.remainder)):
if set(e1.remainder[k]) != set(e2.remainder[k]):
sameR = False
break
if sameC and sameR:
alreadyExists = True
break

if not alreadyExists:
exprsFiltered.append(e1)

allexprs.extend(exprsFiltered)

suffixLength += 1
continue

out = "(?:\n"
for i in range(len(allexprs)):
e = allexprs[i]
out += ("(?:^|[^" + e.curChar + "])")
for c in e.remainder:
if len(c) > 1:
out += "[" + c + "]"
else:
out += c
if i != len(allexprs)-1:
out += "|"
out += "\n"
out += ")"

print("Human readable:")
print(out)
print()
print("Single line:")
print(out.replace("\n",""))




Loading

0 comments on commit 5bc3313

Please sign in to comment.