Skip to content

Commit

Permalink
vmray: skip non-printable strings (#2551)
Browse files Browse the repository at this point in the history
  • Loading branch information
mike-hunhoff authored Jan 8, 2025
1 parent 462e114 commit c3c9368
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

- vmray: load more analysis archives @mr-tz
- dynamic: only check file limitations for static file formats @mr-tz
- vmray: skip non-printable strings @mike-hunhoff

### capa Explorer Web

Expand Down
6 changes: 6 additions & 0 deletions capa/features/extractors/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
# See the License for the specific language governing permissions and limitations under the License.

import re
import string
import contextlib
from collections import namedtuple

Expand All @@ -19,6 +20,7 @@
UNICODE_RE_4 = re.compile(b"((?:[%s]\x00){%d,})" % (ASCII_BYTE, 4))
REPEATS = [b"A", b"\x00", b"\xfe", b"\xff"]
SLICE_SIZE = 4096
PRINTABLE_CHAR_SET = set(string.printable)

String = namedtuple("String", ["s", "offset"])

Expand Down Expand Up @@ -84,3 +86,7 @@ def extract_unicode_strings(buf, n=4):
for match in r.finditer(buf):
with contextlib.suppress(UnicodeDecodeError):
yield String(match.group().decode("utf-16"), match.start())


def is_printable_str(s: str) -> bool:
return set(s).issubset(PRINTABLE_CHAR_SET)
9 changes: 4 additions & 5 deletions capa/features/extractors/vmray/call.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from capa.features.insn import API, Number
from capa.features.common import String, Feature
from capa.features.address import Address
from capa.features.extractors.strings import is_printable_str
from capa.features.extractors.vmray.models import PARAM_TYPE_INT, PARAM_TYPE_STR, Param, FunctionCall, hexint
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle

Expand All @@ -27,11 +28,9 @@ def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[tuple[Feat
if param.deref.type_ in PARAM_TYPE_INT:
yield Number(hexint(param.deref.value)), ch.address
elif param.deref.type_ in PARAM_TYPE_STR:
# TODO(mr-tz): remove FPS like " \\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\x09\\x0a\\x0b\\x0c\\x0d\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\..."
# https://github.com/mandiant/capa/issues/2432

# parsing the data up to here results in double-escaped backslashes, remove those here
yield String(param.deref.value.replace("\\\\", "\\")), ch.address
if is_printable_str(param.deref.value):
# parsing the data up to here results in double-escaped backslashes, remove those here
yield String(param.deref.value.replace("\\\\", "\\")), ch.address
else:
logger.debug("skipping deref param type %s", param.deref.type_)
elif param.value is not None:
Expand Down

0 comments on commit c3c9368

Please sign in to comment.