diff --git a/CHANGELOG.md b/CHANGELOG.md index 32afcb430..fef3a0fa8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ ### Bug Fixes - render: convert feature attributes to aliased dictionary for vverbose #1152 @mike-hunhoff - decouple Token dependency / extractor and features #1139 @mr-tz +- update pydantic model to guarantee type coercion #1176 @mike-hunhoff - do not overwrite version in version.py during PyInstaller build #1169 @mr-tz ### capa explorer IDA Pro plugin diff --git a/capa/features/freeze/__init__.py b/capa/features/freeze/__init__.py index e5c3bedb5..1ef2910a1 100644 --- a/capa/features/freeze/__init__.py +++ b/capa/features/freeze/__init__.py @@ -145,10 +145,13 @@ class BasicBlockFeature(HashableModel): versus right at its starting address. """ - basic_block: Address + basic_block: Address = Field(alias="basic block") address: Address feature: Feature + class Config: + allow_population_by_field_name = True + class InstructionFeature(HashableModel): """ @@ -179,7 +182,7 @@ class BasicBlockFeatures(BaseModel): class FunctionFeatures(BaseModel): address: Address features: Tuple[FunctionFeature, ...] - basic_blocks: Tuple[BasicBlockFeatures, ...] = Field(alias="basic block") + basic_blocks: Tuple[BasicBlockFeatures, ...] = Field(alias="basic blocks") class Config: allow_population_by_field_name = True diff --git a/capa/features/freeze/features.py b/capa/features/freeze/features.py index 9182f8779..0ff21c115 100644 --- a/capa/features/freeze/features.py +++ b/capa/features/freeze/features.py @@ -340,7 +340,6 @@ class OperandOffsetFeature(FeatureModel): MnemonicFeature, OperandNumberFeature, OperandOffsetFeature, - # this has to go last because...? pydantic fails to serialize correctly otherwise. - # possibly because this feature has no associated value? + # Note! this must be last, see #1161 BasicBlockFeature, ] diff --git a/capa/ida/plugin/model.py b/capa/ida/plugin/model.py index 1a38157d6..05ac83fbc 100644 --- a/capa/ida/plugin/model.py +++ b/capa/ida/plugin/model.py @@ -365,12 +365,13 @@ def render_capa_doc_statement_node( @param doc: result doc """ - if isinstance(statement, (rd.AndStatement, rd.OrStatement, rd.OptionalStatement)): - display = statement.type - if statement.description: - display += " (%s)" % statement.description - return CapaExplorerDefaultItem(parent, display) - elif isinstance(statement, rd.NotStatement): + if isinstance(statement, rd.CompoundStatement): + if statement.type != rd.CompoundStatementType.NOT: + display = statement.type + if statement.description: + display += " (%s)" % statement.description + return CapaExplorerDefaultItem(parent, display) + elif isinstance(statement, rd.CompoundStatement) and statement.type == rd.CompoundStatementType.NOT: # TODO: do we display 'not' pass elif isinstance(statement, rd.SomeStatement): @@ -424,7 +425,7 @@ def render_capa_doc_match(self, parent: CapaExplorerDataItem, match: rd.Match, d return # optional statement with no successful children is empty - if isinstance(match.node, rd.StatementNode) and isinstance(match.node.statement, rd.OptionalStatement): + if isinstance(match.node, rd.StatementNode) and match.node.statement.type == rd.CompoundStatementType.OPTIONAL: if not any(map(lambda m: m.success, match.children)): return @@ -524,7 +525,7 @@ def capa_doc_feature_to_display(self, feature: frzf.Feature): @param feature: capa feature read from doc """ key = feature.type - value = getattr(feature, feature.type) + value = feature.dict(by_alias=True).get(feature.type) if value: if isinstance(feature, frzf.StringFeature): @@ -638,6 +639,8 @@ def render_capa_doc_feature( frzf.MnemonicFeature, frzf.NumberFeature, frzf.OffsetFeature, + frzf.OperandNumberFeature, + frzf.OperandOffsetFeature, ), ): # display instruction preview diff --git a/capa/render/result_document.py b/capa/render/result_document.py index 1689aec51..6ae18eadb 100644 --- a/capa/render/result_document.py +++ b/capa/render/result_document.py @@ -124,22 +124,19 @@ def from_capa(cls, meta: Any) -> "Metadata": ) -class StatementModel(FrozenModel): - ... - - -class AndStatement(StatementModel): - type = "and" - description: Optional[str] +class CompoundStatementType: + AND = "and" + OR = "or" + NOT = "not" + OPTIONAL = "optional" -class OrStatement(StatementModel): - type = "or" - description: Optional[str] +class StatementModel(FrozenModel): + ... -class NotStatement(StatementModel): - type = "not" +class CompoundStatement(StatementModel): + type: str description: Optional[str] @@ -149,11 +146,6 @@ class SomeStatement(StatementModel): count: int -class OptionalStatement(StatementModel): - type = "optional" - description: Optional[str] - - class RangeStatement(StatementModel): type = "range" description: Optional[str] @@ -165,17 +157,15 @@ class RangeStatement(StatementModel): class SubscopeStatement(StatementModel): type = "subscope" description: Optional[str] - scope = capa.rules.Scope + scope: capa.rules.Scope Statement = Union[ - OptionalStatement, - AndStatement, - OrStatement, - NotStatement, - SomeStatement, + # Note! order matters, see #1161 RangeStatement, + SomeStatement, SubscopeStatement, + CompoundStatement, ] @@ -185,18 +175,12 @@ class StatementNode(FrozenModel): def statement_from_capa(node: capa.engine.Statement) -> Statement: - if isinstance(node, capa.engine.And): - return AndStatement(description=node.description) - - elif isinstance(node, capa.engine.Or): - return OrStatement(description=node.description) - - elif isinstance(node, capa.engine.Not): - return NotStatement(description=node.description) + if isinstance(node, (capa.engine.And, capa.engine.Or, capa.engine.Not)): + return CompoundStatement(type=node.__class__.__name__.lower(), description=node.description) elif isinstance(node, capa.engine.Some): if node.count == 0: - return OptionalStatement(description=node.description) + return CompoundStatement(type=CompoundStatementType.OPTIONAL, description=node.description) else: return SomeStatement( diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 76a836797..6411da226 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -64,7 +64,7 @@ def render_statement(ostream, match: rd.Match, statement: rd.Statement, indent=0 ostream.write(" = %s" % statement.description) ostream.writeln("") - elif isinstance(statement, (rd.AndStatement, rd.OrStatement, rd.OptionalStatement, rd.NotStatement)): + elif isinstance(statement, (rd.CompoundStatement)): # emit `and:` `or:` `optional:` `not:` ostream.write(statement.type) @@ -87,7 +87,7 @@ def render_statement(ostream, match: rd.Match, statement: rd.Statement, indent=0 # so, we have to inline some of the feature rendering here. child = statement.child - value = getattr(child, child.type) + value = child.dict(by_alias=True).get(child.type) if value: if isinstance(child, frzf.StringFeature): @@ -211,12 +211,12 @@ def render_match(ostream, match: rd.Match, indent=0, mode=MODE_SUCCESS): return # optional statement with no successful children is empty - if isinstance(match.node, rd.StatementNode) and isinstance(match.node.statement, rd.OptionalStatement): + if isinstance(match.node, rd.StatementNode) and match.node.statement.type == rd.CompoundStatementType.OPTIONAL: if not any(map(lambda m: m.success, match.children)): return # not statement, so invert the child mode to show failed evaluations - if isinstance(match.node, rd.StatementNode) and isinstance(match.node.statement, rd.NotStatement): + if isinstance(match.node, rd.StatementNode) and match.node.statement.type == rd.CompoundStatementType.NOT: child_mode = MODE_FAILURE elif mode == MODE_FAILURE: @@ -225,12 +225,12 @@ def render_match(ostream, match: rd.Match, indent=0, mode=MODE_SUCCESS): return # optional statement with successful children is not relevant - if isinstance(match.node, rd.StatementNode) and isinstance(match.node.statement, rd.OptionalStatement): + if isinstance(match.node, rd.StatementNode) and match.node.statement.type == rd.CompoundStatementType.OPTIONAL: if any(map(lambda m: m.success, match.children)): return # not statement, so invert the child mode to show successful evaluations - if isinstance(match.node, rd.StatementNode) and isinstance(match.node.statement, rd.NotStatement): + if isinstance(match.node, rd.StatementNode) and match.node.statement.type == rd.CompoundStatementType.NOT: child_mode = MODE_SUCCESS else: raise RuntimeError("unexpected mode: " + mode) diff --git a/tests/test_result_document.py b/tests/test_result_document.py new file mode 100644 index 000000000..8074e1cdf --- /dev/null +++ b/tests/test_result_document.py @@ -0,0 +1,200 @@ +# Copyright (C) 2020 FireEye, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +import capa +import capa.engine as ceng +import capa.render.result_document as rdoc +import capa.features.freeze.features as frzf + + +def test_optional_node_from_capa(): + node = rdoc.node_from_capa( + ceng.Some( + 0, + [], + ) + ) + assert isinstance(node.statement, rdoc.CompoundStatement) + assert node.statement.type == rdoc.CompoundStatementType.OPTIONAL + + +def test_some_node_from_capa(): + node = rdoc.node_from_capa( + ceng.Some( + 1, + [ + capa.features.insn.Number(0), + ], + ) + ) + assert isinstance(node.statement, rdoc.SomeStatement) + + +def test_range_node_from_capa(): + node = rdoc.node_from_capa( + ceng.Range( + capa.features.insn.Number(0), + ) + ) + assert isinstance(node.statement, rdoc.RangeStatement) + + +def test_subscope_node_from_capa(): + node = rdoc.node_from_capa( + ceng.Subscope( + capa.rules.Scope.BASIC_BLOCK, + capa.features.insn.Number(0), + ) + ) + assert isinstance(node.statement, rdoc.SubscopeStatement) + + +def test_and_node_from_capa(): + node = rdoc.node_from_capa( + ceng.And( + [ + capa.features.insn.Number(0), + ], + ) + ) + assert isinstance(node.statement, rdoc.CompoundStatement) + assert node.statement.type == rdoc.CompoundStatementType.AND + + +def test_or_node_from_capa(): + node = rdoc.node_from_capa( + ceng.Or( + [ + capa.features.insn.Number(0), + ], + ) + ) + assert isinstance(node.statement, rdoc.CompoundStatement) + assert node.statement.type == rdoc.CompoundStatementType.OR + + +def test_not_node_from_capa(): + node = rdoc.node_from_capa( + ceng.Not( + [ + capa.features.insn.Number(0), + ], + ) + ) + assert isinstance(node.statement, rdoc.CompoundStatement) + assert node.statement.type == rdoc.CompoundStatementType.NOT + + +def test_os_node_from_capa(): + node = rdoc.node_from_capa(capa.features.common.OS("")) + assert isinstance(node.feature, frzf.OSFeature) + + +def test_arch_node_from_capa(): + node = rdoc.node_from_capa(capa.features.common.Arch("")) + assert isinstance(node.feature, frzf.ArchFeature) + + +def test_format_node_from_capa(): + node = rdoc.node_from_capa(capa.features.common.Format("")) + assert isinstance(node.feature, frzf.FormatFeature) + + +def test_match_node_from_capa(): + node = rdoc.node_from_capa(capa.features.common.MatchedRule("")) + assert isinstance(node.feature, frzf.MatchFeature) + + +def test_characteristic_node_from_capa(): + node = rdoc.node_from_capa(capa.features.common.Characteristic("")) + assert isinstance(node.feature, frzf.CharacteristicFeature) + + +def test_substring_node_from_capa(): + node = rdoc.node_from_capa(capa.features.common.Substring("")) + assert isinstance(node.feature, frzf.SubstringFeature) + + +def test_regex_node_from_capa(): + node = rdoc.node_from_capa(capa.features.common.Regex("")) + assert isinstance(node.feature, frzf.RegexFeature) + + +def test_class_node_from_capa(): + node = rdoc.node_from_capa(capa.features.common.Class("")) + assert isinstance(node.feature, frzf.ClassFeature) + + +def test_namespace_node_from_capa(): + node = rdoc.node_from_capa(capa.features.common.Namespace("")) + assert isinstance(node.feature, frzf.NamespaceFeature) + + +def test_bytes_node_from_capa(): + node = rdoc.node_from_capa(capa.features.common.Bytes(b"")) + assert isinstance(node.feature, frzf.BytesFeature) + + +def test_export_node_from_capa(): + node = rdoc.node_from_capa(capa.features.file.Export("")) + assert isinstance(node.feature, frzf.ExportFeature) + + +def test_import_node_from_capa(): + node = rdoc.node_from_capa(capa.features.file.Import("")) + assert isinstance(node.feature, frzf.ImportFeature) + + +def test_section_node_from_capa(): + node = rdoc.node_from_capa(capa.features.file.Section("")) + assert isinstance(node.feature, frzf.SectionFeature) + + +def test_function_name_node_from_capa(): + node = rdoc.node_from_capa(capa.features.file.FunctionName("")) + assert isinstance(node.feature, frzf.FunctionNameFeature) + + +def test_api_node_from_capa(): + node = rdoc.node_from_capa(capa.features.insn.API("")) + assert isinstance(node.feature, frzf.APIFeature) + + +def test_property_node_from_capa(): + node = rdoc.node_from_capa(capa.features.insn.Property("")) + assert isinstance(node.feature, frzf.PropertyFeature) + + +def test_number_node_from_capa(): + node = rdoc.node_from_capa(capa.features.insn.Number(0)) + assert isinstance(node.feature, frzf.NumberFeature) + + +def test_offset_node_from_capa(): + node = rdoc.node_from_capa(capa.features.insn.Offset(0)) + assert isinstance(node.feature, frzf.OffsetFeature) + + +def test_mnemonic_node_from_capa(): + node = rdoc.node_from_capa(capa.features.insn.Mnemonic("")) + assert isinstance(node.feature, frzf.MnemonicFeature) + + +def test_operand_number_node_from_capa(): + node = rdoc.node_from_capa(capa.features.insn.OperandNumber(0, 0)) + assert isinstance(node.feature, frzf.OperandNumberFeature) + + +def test_operand_offset_node_from_capa(): + node = rdoc.node_from_capa(capa.features.insn.OperandOffset(0, 0)) + assert isinstance(node.feature, frzf.OperandOffsetFeature) + + +def test_basic_block_node_from_capa(): + node = rdoc.node_from_capa(capa.features.basicblock.BasicBlock("")) + assert isinstance(node.feature, frzf.BasicBlockFeature)