Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

show analysis flavor #1712

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions capa/ida/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ def collect_metadata(rules: List[Path]):
sha256=sha256,
path=idaapi.get_input_file_path(),
),
flavor=rdoc.Flavor.STATIC,
analysis=rdoc.StaticAnalysis(
format=idaapi.get_file_type_name(),
arch=arch,
Expand Down
9 changes: 9 additions & 0 deletions capa/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import colorama
import tqdm.contrib.logging
from pefile import PEFormatError
from typing_extensions import assert_never
from elftools.common.exceptions import ELFError

import capa.perf
Expand Down Expand Up @@ -1022,6 +1023,13 @@ def collect_metadata(
arch = get_arch(sample_path)
os_ = get_os(sample_path) if os_ == OS_AUTO else os_

if isinstance(extractor, StaticFeatureExtractor):
flavor = rdoc.Flavor.STATIC
elif isinstance(extractor, DynamicFeatureExtractor):
flavor = rdoc.Flavor.DYNAMIC
else:
assert_never(extractor)

return rdoc.Metadata(
timestamp=datetime.datetime.now(),
version=capa.version.__version__,
Expand All @@ -1032,6 +1040,7 @@ def collect_metadata(
sha256=sha256,
path=str(Path(sample_path).resolve()),
),
flavor=flavor,
analysis=get_sample_analysis(
format_,
arch,
Expand Down
1 change: 1 addition & 0 deletions capa/render/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def render_meta(doc: rd.ResultDocument, ostream: StringIO):
(width("md5", 22), width(doc.meta.sample.md5, 82)),
("sha1", doc.meta.sample.sha1),
("sha256", doc.meta.sample.sha256),
("analysis", doc.meta.flavor),
("os", doc.meta.analysis.os),
("format", doc.meta.analysis.format),
("arch", doc.meta.analysis.arch),
Expand Down
20 changes: 20 additions & 0 deletions capa/render/proto/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,13 +121,23 @@ def scope_to_pb2(scope: capa.rules.Scope) -> capa_pb2.Scope.ValueType:
assert_never(scope)


def flavor_to_pb2(flavor: rd.Flavor) -> capa_pb2.Flavor.ValueType:
if flavor == rd.Flavor.STATIC:
return capa_pb2.Flavor.FLAVOR_STATIC
elif flavor == rd.Flavor.DYNAMIC:
return capa_pb2.Flavor.FLAVOR_DYNAMIC
else:
assert_never(flavor)


def metadata_to_pb2(meta: rd.Metadata) -> capa_pb2.Metadata:
assert isinstance(meta.analysis, rd.StaticAnalysis)
return capa_pb2.Metadata(
timestamp=str(meta.timestamp),
version=meta.version,
argv=meta.argv,
sample=google.protobuf.json_format.ParseDict(meta.sample.model_dump(), capa_pb2.Sample()),
flavor=flavor_to_pb2(meta.flavor),
analysis=capa_pb2.Analysis(
format=meta.analysis.format,
arch=meta.analysis.arch,
Expand Down Expand Up @@ -480,6 +490,15 @@ def scope_from_pb2(scope: capa_pb2.Scope.ValueType) -> capa.rules.Scope:
assert_never(scope)


def flavor_from_pb2(flavor: capa_pb2.Flavor.ValueType) -> rd.Flavor:
if flavor == capa_pb2.Flavor.FLAVOR_STATIC:
return rd.Flavor.STATIC
elif flavor == capa_pb2.Flavor.FLAVOR_DYNAMIC:
return rd.Flavor.DYNAMIC
else:
assert_never(flavor)


def metadata_from_pb2(meta: capa_pb2.Metadata) -> rd.Metadata:
return rd.Metadata(
timestamp=datetime.datetime.fromisoformat(meta.timestamp),
Expand All @@ -491,6 +510,7 @@ def metadata_from_pb2(meta: capa_pb2.Metadata) -> rd.Metadata:
sha256=meta.sample.sha256,
path=meta.sample.path,
),
flavor=flavor_from_pb2(meta.flavor),
analysis=rd.StaticAnalysis(
format=meta.analysis.format,
arch=meta.analysis.arch,
Expand Down
7 changes: 7 additions & 0 deletions capa/render/proto/capa.proto
Original file line number Diff line number Diff line change
Expand Up @@ -192,12 +192,19 @@ message MatchFeature {
optional string description = 3;
}

enum Flavor {
FLAVOR_UNSPECIFIED = 0;
FLAVOR_STATIC = 1;
FLAVOR_DYNAMIC = 2;
}

message Metadata {
string timestamp = 1; // iso8601 format, like: 2019-01-01T00:00:00Z
string version = 2;
repeated string argv = 3;
Sample sample = 4;
Analysis analysis = 5;
Flavor flavor = 6;
}

message MnemonicFeature {
Expand Down
3,844 changes: 3,720 additions & 124 deletions capa/render/proto/capa_pb2.py

Large diffs are not rendered by default.

22 changes: 21 additions & 1 deletion capa/render/proto/capa_pb2.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,23 @@ ADDRESSTYPE_DN_TOKEN_OFFSET: AddressType.ValueType # 5
ADDRESSTYPE_NO_ADDRESS: AddressType.ValueType # 6
global___AddressType = AddressType

class _Flavor:
ValueType = typing.NewType("ValueType", builtins.int)
V: typing_extensions.TypeAlias = ValueType

class _FlavorEnumTypeWrapper(google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[_Flavor.ValueType], builtins.type):
DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor
FLAVOR_UNSPECIFIED: _Flavor.ValueType # 0
FLAVOR_STATIC: _Flavor.ValueType # 1
FLAVOR_DYNAMIC: _Flavor.ValueType # 2

class Flavor(_Flavor, metaclass=_FlavorEnumTypeWrapper): ...

FLAVOR_UNSPECIFIED: Flavor.ValueType # 0
FLAVOR_STATIC: Flavor.ValueType # 1
FLAVOR_DYNAMIC: Flavor.ValueType # 2
global___Flavor = Flavor

class _Scope:
ValueType = typing.NewType("ValueType", builtins.int)
V: typing_extensions.TypeAlias = ValueType
Expand Down Expand Up @@ -776,6 +793,7 @@ class Metadata(google.protobuf.message.Message):
ARGV_FIELD_NUMBER: builtins.int
SAMPLE_FIELD_NUMBER: builtins.int
ANALYSIS_FIELD_NUMBER: builtins.int
FLAVOR_FIELD_NUMBER: builtins.int
timestamp: builtins.str
"""iso8601 format, like: 2019-01-01T00:00:00Z"""
version: builtins.str
Expand All @@ -785,6 +803,7 @@ class Metadata(google.protobuf.message.Message):
def sample(self) -> global___Sample: ...
@property
def analysis(self) -> global___Analysis: ...
flavor: global___Flavor.ValueType
def __init__(
self,
*,
Expand All @@ -793,9 +812,10 @@ class Metadata(google.protobuf.message.Message):
argv: collections.abc.Iterable[builtins.str] | None = ...,
sample: global___Sample | None = ...,
analysis: global___Analysis | None = ...,
flavor: global___Flavor.ValueType = ...,
) -> None: ...
def HasField(self, field_name: typing_extensions.Literal["analysis", b"analysis", "sample", b"sample"]) -> builtins.bool: ...
def ClearField(self, field_name: typing_extensions.Literal["analysis", b"analysis", "argv", b"argv", "sample", b"sample", "timestamp", b"timestamp", "version", b"version"]) -> None: ...
def ClearField(self, field_name: typing_extensions.Literal["analysis", b"analysis", "argv", b"argv", "flavor", b"flavor", "sample", b"sample", "timestamp", b"timestamp", "version", b"version"]) -> None: ...

global___Metadata = Metadata

Expand Down
7 changes: 7 additions & 0 deletions capa/render/result_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# See the License for the specific language governing permissions and limitations under the License.
import datetime
import collections
from enum import Enum
from typing import Dict, List, Tuple, Union, Literal, Optional

from pydantic import Field, BaseModel, ConfigDict
Expand Down Expand Up @@ -120,11 +121,17 @@ class DynamicAnalysis(Model):
Analysis: TypeAlias = Union[StaticAnalysis, DynamicAnalysis]


class Flavor(str, Enum):
STATIC = "static"
DYNAMIC = "dynamic"


class Metadata(Model):
timestamp: datetime.datetime
version: str
argv: Optional[Tuple[str, ...]]
sample: Sample
flavor: Flavor
analysis: Analysis


Expand Down
13 changes: 9 additions & 4 deletions capa/render/verbose.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,8 @@ def format_address(address: frz.Address) -> str:
return f"token({capa.helpers.hex(token)})+{capa.helpers.hex(offset)}"
elif address.type == frz.AddressType.DYNAMIC:
assert isinstance(address.value, tuple)
id_, return_address = address.value
assert isinstance(id_, int)
assert isinstance(return_address, int)
return f"event: {id_}, retaddr: 0x{return_address:x}"
ppid, pid, tid, id_, return_address = address.value
return f"process ppid: {ppid}, process pid: {pid}, thread id: {tid}, call: {id_}, return address: {capa.helpers.hex(return_address)}"
elif address.type == frz.AddressType.PROCESS:
assert isinstance(address.value, tuple)
ppid, pid = address.value
Expand All @@ -71,6 +69,10 @@ def format_address(address: frz.Address) -> str:
tid = address.value
assert isinstance(tid, int)
return f"thread id: {tid}"
elif address.type == frz.AddressType.CALL:
assert isinstance(address.value, tuple)
ppid, pid, tid, id_ = address.value
return f"process ppid: {ppid}, process pid: {pid}, thread id: {tid}, call: {id_}"
elif address.type == frz.AddressType.NO_ADDRESS:
return "global"
else:
Expand All @@ -90,6 +92,7 @@ def render_static_meta(ostream, doc: rd.ResultDocument):
os windows
format pe
arch amd64
analysis static
extractor VivisectFeatureExtractor
base address 0x10000000
rules (embedded rules)
Expand All @@ -108,6 +111,7 @@ def render_static_meta(ostream, doc: rd.ResultDocument):
("os", doc.meta.analysis.os),
("format", doc.meta.analysis.format),
("arch", doc.meta.analysis.arch),
("analysis", doc.meta.flavor),
("extractor", doc.meta.analysis.extractor),
("base address", format_address(doc.meta.analysis.base_address)),
("rules", "\n".join(doc.meta.analysis.rules)),
Expand Down Expand Up @@ -152,6 +156,7 @@ def render_dynamic_meta(ostream, doc: rd.ResultDocument):
("os", doc.meta.analysis.os),
("format", doc.meta.analysis.format),
("arch", doc.meta.analysis.arch),
("analysis", doc.meta.flavor),
("extractor", doc.meta.analysis.extractor),
("rules", "\n".join(doc.meta.analysis.rules)),
("process count", len(doc.meta.analysis.feature_counts.processes)),
Expand Down
59 changes: 10 additions & 49 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
BBHandle,
CallHandle,
InsnHandle,
SampleHashes,
ThreadHandle,
ProcessHandle,
FunctionHandle,
Expand Down Expand Up @@ -364,8 +363,18 @@ def get_data_path_by_name(name) -> Path:
/ "data"
/ "dynamic"
/ "cape"
/ "v2.2"
/ "0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json.gz"
)
elif name.startswith("d46900"):
return (
CD
/ "data"
/ "dynamic"
/ "cape"
/ "v2.2"
/ "d46900384c78863420fb3e297d0a2f743cd2b6b3f7f82bf64059a168e07aceb7.json.gz"
)
elif name.startswith("ea2876"):
return CD / "data" / "ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_"
else:
Expand Down Expand Up @@ -653,54 +662,6 @@ def parametrize(params, values, **kwargs):
return pytest.mark.parametrize(params, values, ids=ids, **kwargs)


EXTRACTOR_HASHING_TESTS = [
# viv extractor
(
get_viv_extractor(get_data_path_by_name("mimikatz")),
SampleHashes(
md5="5f66b82558ca92e54e77f216ef4c066c",
sha1="e4f82e4d7f22938dc0a0ff8a4a7ad2a763643d38",
sha256="131314a6f6d1d263c75b9909586b3e1bd837036329ace5e69241749e861ac01d",
),
),
# PE extractor
(
get_pefile_extractor(get_data_path_by_name("mimikatz")),
SampleHashes(
md5="5f66b82558ca92e54e77f216ef4c066c",
sha1="e4f82e4d7f22938dc0a0ff8a4a7ad2a763643d38",
sha256="131314a6f6d1d263c75b9909586b3e1bd837036329ace5e69241749e861ac01d",
),
),
# dnFile extractor
(
get_dnfile_extractor(get_data_path_by_name("b9f5b")),
SampleHashes(
md5="b9f5bd514485fb06da39beff051b9fdc",
sha1="c72a2e50410475a51d897d29ffbbaf2103754d53",
sha256="34acc4c0b61b5ce0b37c3589f97d1f23e6d84011a241e6f85683ee517ce786f1",
),
),
# dotnet File
(
get_dotnetfile_extractor(get_data_path_by_name("b9f5b")),
SampleHashes(
md5="b9f5bd514485fb06da39beff051b9fdc",
sha1="c72a2e50410475a51d897d29ffbbaf2103754d53",
sha256="34acc4c0b61b5ce0b37c3589f97d1f23e6d84011a241e6f85683ee517ce786f1",
),
),
# cape extractor
(
get_cape_extractor(get_data_path_by_name("0000a657")),
SampleHashes(
md5="e2147b5333879f98d515cd9aa905d489",
sha1="ad4d520fb7792b4a5701df973d6bd8a6cbfbb57f",
sha256="0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82",
),
),
]

DYNAMIC_FEATURE_PRESENCE_TESTS = sorted(
[
# file/string
Expand Down
48 changes: 42 additions & 6 deletions tests/test_extractor_hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,48 @@
logger = logging.getLogger(__name__)


@fixtures.parametrize(
"extractor,hashes",
fixtures.EXTRACTOR_HASHING_TESTS,
)
def test_hash_extraction(extractor, hashes):
assert extractor.get_sample_hashes() == hashes
def test_viv_hash_extraction():
assert fixtures.get_viv_extractor(fixtures.get_data_path_by_name("mimikatz")).get_sample_hashes() == SampleHashes(
md5="5f66b82558ca92e54e77f216ef4c066c",
sha1="e4f82e4d7f22938dc0a0ff8a4a7ad2a763643d38",
sha256="131314a6f6d1d263c75b9909586b3e1bd837036329ace5e69241749e861ac01d",
)


def test_pefile_hash_extraction():
assert fixtures.get_pefile_extractor(
fixtures.get_data_path_by_name("mimikatz")
).get_sample_hashes() == SampleHashes(
md5="5f66b82558ca92e54e77f216ef4c066c",
sha1="e4f82e4d7f22938dc0a0ff8a4a7ad2a763643d38",
sha256="131314a6f6d1d263c75b9909586b3e1bd837036329ace5e69241749e861ac01d",
)


def test_dnfile_hash_extraction():
assert fixtures.get_dnfile_extractor(fixtures.get_data_path_by_name("b9f5b")).get_sample_hashes() == SampleHashes(
md5="b9f5bd514485fb06da39beff051b9fdc",
sha1="c72a2e50410475a51d897d29ffbbaf2103754d53",
sha256="34acc4c0b61b5ce0b37c3589f97d1f23e6d84011a241e6f85683ee517ce786f1",
)


def test_dotnetfile_hash_extraction():
assert fixtures.get_dotnetfile_extractor(
fixtures.get_data_path_by_name("b9f5b")
).get_sample_hashes() == SampleHashes(
md5="b9f5bd514485fb06da39beff051b9fdc",
sha1="c72a2e50410475a51d897d29ffbbaf2103754d53",
sha256="34acc4c0b61b5ce0b37c3589f97d1f23e6d84011a241e6f85683ee517ce786f1",
)


def test_cape_hash_extraction():
assert fixtures.get_cape_extractor(fixtures.get_data_path_by_name("0000a657")).get_sample_hashes() == SampleHashes(
md5="e2147b5333879f98d515cd9aa905d489",
sha1="ad4d520fb7792b4a5701df973d6bd8a6cbfbb57f",
sha256="0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82",
)


# We need to skip the binja test if we cannot import binaryninja, e.g., in GitHub CI.
Expand Down
Loading
Loading