From 0fbabea67c885588c356170a83a5009d564a63e7 Mon Sep 17 00:00:00 2001 From: cccs-jh <63320703+cccs-jh@users.noreply.github.com> Date: Wed, 22 Mar 2023 21:09:07 -0400 Subject: [PATCH 1/2] Improving Charcodes - added decimal \nnn escape sequences to charcode - removed 0xhh sequences from charcode_hex to stop it mangling integer constants - created hex_constant to handle 0xhh.. integer constants --- deobs.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/deobs.py b/deobs.py index 78c2e0b..4fde1c0 100644 --- a/deobs.py +++ b/deobs.py @@ -69,12 +69,14 @@ def add1b(s: bytes, k: int) -> bytes: @staticmethod def charcode(text: bytes) -> Optional[bytes]: """ Replace character codes with the corresponding characters """ - # To do: what decimal encodings exist in scripting languages and how to decode them? + # To do: something to handle powershell bytes syntax + output = regex.sub(rb'\\(\d{1,3})', lambda m: bytes((int(m.group(1)),)), text) + return output if output != text else None @staticmethod def charcode_hex(text: bytes) -> Optional[bytes]: """ Replace hex character codes with the corresponding characters """ - output = regex.sub(rb'(?i)(?:\\x|0x|%)([a-f0-9]{2})', lambda m: binascii.unhexlify(m.group(1)), text) + output = regex.sub(rb'(?i)(?:\\x|%)([a-f0-9]{2})', lambda m: binascii.unhexlify(m.group(1)), text) return output if output != text else None @staticmethod @@ -90,6 +92,12 @@ def charcode_xml(text: bytes) -> Optional[bytes]: output = regex.sub(rb'&#([0-9]{1,7});', partial(DeobfuScripter.codepoint_sub, base=10), output) return output if output != text else None + @staticmethod + def hex_constant(text: bytes) -> Optional[bytes]: + """ Replace hexadecimal integer constants with decimal ones""" + output = regex.sub(rb'(?i)\b0x([a-f0-9]{0,16})\b', lambda m: str(int(m.group(1), 16)).encode(), text) + return output if output != text else None + @staticmethod def chr_decode(text: bytes) -> Optional[bytes]: """ Replace calls to chr with the corresponding character """ @@ -471,9 +479,11 @@ def execute(self, request: ServiceRequest) -> None: ('Concat strings', self.concat_strings), ('MSWord macro vars', self.mswordmacro_vars), ('Powershell vars', self.powershell_vars), + ('Charcodes', self.charcode), ('Hex Charcodes', self.charcode_hex), ('Unicode Charcodes', self.charcode_unicode), - ('XML Charcodes', self.charcode_xml) + ('XML Charcodes', self.charcode_xml), + ('Hex Int Constants', self.hex_constant), ] second_pass.extend(first_pass) final_pass: TechniqueList = [] From 25b51e13e9353da405322c795dc2d7803e06f214 Mon Sep 17 00:00:00 2001 From: cccs-jh <63320703+cccs-jh@users.noreply.github.com> Date: Fri, 24 Mar 2023 11:26:54 -0400 Subject: [PATCH 2/2] Fix charcodes errors - \nnn escapes are octal and not decimal, and potentially causes problems for windows filepaths, disable for now - hex_constants needs at least one hex character to decode. --- deobs.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/deobs.py b/deobs.py index 213a5cf..0352def 100644 --- a/deobs.py +++ b/deobs.py @@ -69,9 +69,7 @@ def add1b(s: bytes, k: int) -> bytes: @staticmethod def charcode(text: bytes) -> Optional[bytes]: """ Replace character codes with the corresponding characters """ - # To do: something to handle powershell bytes syntax - output = regex.sub(rb'\\(\d{1,3})', lambda m: bytes((int(m.group(1)),)), text) - return output if output != text else None + # Todo: something to handle powershell bytes syntax @staticmethod def charcode_hex(text: bytes) -> Optional[bytes]: @@ -79,6 +77,13 @@ def charcode_hex(text: bytes) -> Optional[bytes]: output = regex.sub(rb'(?i)(?:\\x|%)([a-f0-9]{2})', lambda m: binascii.unhexlify(m.group(1)), text) return output if output != text else None + # Todo: find a way to prevent charcode_oct from mangling windows filepaths with sections that start with 0-7 + @staticmethod + def charcode_oct(text: bytes) -> Optional[bytes]: + """ Replace octal character codes with the corresponding characters """ + output = regex.sub(rb'\\([0-7]{1,3})', partial(DeobfuScripter.codepoint_sub, base=8), text) + return output if output != text else None + @staticmethod def charcode_unicode(text: bytes) -> Optional[bytes]: """ Replace unicode character codes with the corresponding utf-8 byte sequence""" @@ -95,7 +100,7 @@ def charcode_xml(text: bytes) -> Optional[bytes]: @staticmethod def hex_constant(text: bytes) -> Optional[bytes]: """ Replace hexadecimal integer constants with decimal ones""" - output = regex.sub(rb'(?i)\b0x([a-f0-9]{0,16})\b', lambda m: str(int(m.group(1), 16)).encode(), text) + output = regex.sub(rb'(?i)\b0x([a-f0-9]{1,16})\b', lambda m: str(int(m.group(1), 16)).encode('utf-8'), text) return output if output != text else None @staticmethod @@ -498,8 +503,8 @@ def execute(self, request: ServiceRequest) -> None: ('Concat strings', self.concat_strings), ('MSWord macro vars', self.mswordmacro_vars), ('Powershell vars', self.powershell_vars), - ('Charcodes', self.charcode), ('Hex Charcodes', self.charcode_hex), + # ('Octal Charcodes', self.charcode_oct), ('Unicode Charcodes', self.charcode_unicode), ('XML Charcodes', self.charcode_xml), ('Hex Int Constants', self.hex_constant),