From 1c0273b82e50fa58647138861f1f288282dd9e8e Mon Sep 17 00:00:00 2001 From: Thomas Patzke Date: Sat, 12 Oct 2024 23:17:09 +0200 Subject: [PATCH 1/2] SigmaString to regex conversion --- sigma/types.py | 11 +++++++++++ tests/test_types.py | 13 +++++++++++++ 2 files changed, 24 insertions(+) diff --git a/sigma/types.py b/sigma/types.py index 478976a..6fd120b 100644 --- a/sigma/types.py +++ b/sigma/types.py @@ -565,6 +565,17 @@ def convert( ) return s + def to_regex(self) -> "SigmaRegularExpression": + """Convert SigmaString into a regular expression.""" + return SigmaRegularExpression( + self.convert( + escape_char="\\", + wildcard_multi=".*", + wildcard_single=".", + add_escaped=".*+?^$[](){}\\|", + ) + ) + class SigmaCasedString(SigmaString): """Case-sensitive string matching.""" diff --git a/tests/test_types.py b/tests/test_types.py index acf19e7..0c34c9b 100644 --- a/tests/test_types.py +++ b/tests/test_types.py @@ -388,6 +388,19 @@ def test_strings_convert_invalid_part(): s.convert() +def test_strings_to_regex(): + s = SigmaString("Test*Special?(Plain)[\\*\\?]") + assert s.s == ( + "Test", + SpecialChars.WILDCARD_MULTI, + "Special", + SpecialChars.WILDCARD_SINGLE, + "(Plain)[*?]", + ) + r = s.to_regex() + assert r.regexp == "Test.*Special.\\(Plain\\)\\[\\*\\?\\]" + + def test_string_index(sigma_string): assert sigma_string[3] == SigmaString("s") From 903b8a24774106ebd3967fd96b4754dd406f7fda Mon Sep 17 00:00:00 2001 From: Thomas Patzke Date: Sun, 13 Oct 2024 00:19:37 +0200 Subject: [PATCH 2/2] Value as regex in backend query template with {regex} --- sigma/conversion/base.py | 12 ++++-- tests/test_conversion_base.py | 69 +++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 4 deletions(-) diff --git a/sigma/conversion/base.py b/sigma/conversion/base.py index b30038c..8fc9a7b 100644 --- a/sigma/conversion/base.py +++ b/sigma/conversion/base.py @@ -807,7 +807,7 @@ class variables. If this is not sufficient, the respective methods can be implem endswith_expression: ClassVar[Optional[str]] = None contains_expression: ClassVar[Optional[str]] = None wildcard_match_expression: ClassVar[Optional[str]] = ( - None # Special expression if wildcards can't be matched with the eq_token operator + None # Special expression if wildcards can't be matched with the eq_token operator. ) # Regular expressions @@ -831,6 +831,7 @@ class variables. If this is not sufficient, the respective methods can be implem # Case sensitive string matching expression. String is quoted/escaped like a normal string. # Placeholders {field} and {value} are replaced with field name and quoted/escaped string. + # {regex} contains the value expressed as regular expression. case_sensitive_match_expression: ClassVar[Optional[str]] = None # Case sensitive string matching operators similar to standard string matching. If not provided, # case_sensitive_match_expression is used. @@ -888,10 +889,10 @@ class variables. If this is not sufficient, the respective methods can be implem # Value not bound to a field unbound_value_str_expression: ClassVar[Optional[str]] = ( - None # Expression for string value not bound to a field as format string with placeholder {value} + None # Expression for string value not bound to a field as format string with placeholder {value} and {regex} (value as regular expression) ) unbound_value_num_expression: ClassVar[Optional[str]] = ( - None # Expression for number value not bound to a field as format string with placeholder {value} + None # Expression for number value not bound to a field as format string with placeholder {value} and {regex} (value as regular expression) ) unbound_value_re_expression: ClassVar[Optional[str]] = ( None # Expression for regular expression not bound to a field as format string with placeholder {value} and {flag_x} as described for re_expression @@ -1339,6 +1340,7 @@ def convert_condition_field_eq_val_str( return expr.format( field=self.escape_and_quote_field(cond.field), value=self.convert_value_str(value, state), + regex=self.convert_value_re(value.to_regex(), state), backend=self, ) except TypeError: # pragma: no cover @@ -1388,6 +1390,7 @@ def convert_condition_field_eq_val_str_case_sensitive( return expr.format( field=self.escape_and_quote_field(cond.field), value=self.convert_value_str(value, state), + regex=self.convert_value_re(value.to_regex(), state), ) except TypeError: # pragma: no cover raise NotImplementedError( @@ -1563,7 +1566,8 @@ def convert_condition_val_str( ) -> Union[str, DeferredQueryExpression]: """Conversion of value-only strings.""" return self.unbound_value_str_expression.format( - value=self.convert_value_str(cond.value, state) + value=self.convert_value_str(cond.value, state), + regex=self.convert_value_re(cond.value.to_regex(), state), ) def convert_condition_val_num( diff --git a/tests/test_conversion_base.py b/tests/test_conversion_base.py index 91f0f85..1f48342 100644 --- a/tests/test_conversion_base.py +++ b/tests/test_conversion_base.py @@ -503,6 +503,52 @@ def test_convert_value_str_contains_further_wildcard(test_backend): ) +def test_convert_value_str_wildcard_to_regex(test_backend, monkeypatch): + monkeypatch.setattr(test_backend, "wildcard_match_expression", '{field} match "{regex}"') + assert ( + test_backend.convert( + SigmaCollection.from_yaml( + """ + title: Test + status: test + logsource: + category: test_category + product: test_product + detection: + sel: + fieldA|contains: "va*lue" + condition: sel + """ + ) + ) + == ['mappedA match ".*va.*lue.*"'] + ) + + +def test_convert_value_str_wildcard_to_regex_cased(test_backend, monkeypatch): + monkeypatch.setattr( + test_backend, "case_sensitive_match_expression", '{field} casematch "{regex}"' + ) + assert ( + test_backend.convert( + SigmaCollection.from_yaml( + """ + title: Test + status: test + logsource: + category: test_category + product: test_product + detection: + sel: + fieldA|contains|cased: "va*lue" + condition: sel + """ + ) + ) + == ['mappedA casematch ".*va.*lue.*"'] + ) + + def test_convert_value_str_contains_expression_not_defined(test_backend, monkeypatch): monkeypatch.setattr(test_backend, "contains_expression", None) assert ( @@ -1830,6 +1876,29 @@ def test_convert_unbound_values(test_backend): ) +def test_convert_unbound_values_regex(test_backend, monkeypatch): + monkeypatch.setattr(test_backend, "unbound_value_str_expression", '_=~"{regex}"') + assert ( + test_backend.convert( + SigmaCollection.from_yaml( + """ + title: Test + status: test + logsource: + category: test_category + product: test_product + detection: + sel: + - value*1 + - value?2 + condition: sel + """ + ) + ) + == ['_=~"value.*1" or _=~"value.2"'] + ) + + def test_convert_invalid_unbound_bool(test_backend): with pytest.raises(SigmaValueError, match="Boolean values can't appear as standalone"): test_backend.convert(