From a9f58142052ae5258bc9ae859a7567187699469d Mon Sep 17 00:00:00 2001 From: cccs-kevin Date: Fri, 3 Nov 2023 12:14:39 +0000 Subject: [PATCH 1/2] Updating VSCode settings --- .vscode/settings.json | 16 +- deobs.py | 344 ++++++++++++++++++++++++------------------ 2 files changed, 198 insertions(+), 162 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 49d23d0..1d6ab98 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -18,21 +18,15 @@ "--profile=black", // "--src=${workspaceFolder}" ], - "python.formatting.autopep8Args": [ - "--max-line-length", - "120", - "--experimental" - ], - "python.formatting.provider": "autopep8", - "python.formatting.blackArgs": [ + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter" + }, + "black-formatter.args": [ "--line-length=120" ], - "python.linting.enabled": true, - "python.linting.flake8Enabled": true, - "python.linting.flake8Args": [ + "flake8.args": [ "--max-line-length=120", //Added the ignore of E203 for now : https://github.com/PyCQA/pycodestyle/issues/373 "--ignore=E203,W503" ], - "python.linting.pylintEnabled": false, } diff --git a/deobs.py b/deobs.py index 50253be..1684cdc 100644 --- a/deobs.py +++ b/deobs.py @@ -21,9 +21,10 @@ class DeobfuScripter(ServiceBase): - """ Service for deobfuscating scripts """ - FILETYPES = ['application', 'document', 'exec', 'image', 'Microsoft', 'text'] - VALIDCHARS = b' 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~' + """Service for deobfuscating scripts""" + + FILETYPES = ["application", "document", "exec", "image", "Microsoft", "text"] + VALIDCHARS = b" 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" BINCHARS = bytes(list(set(range(0, 256)) - set(VALIDCHARS))) def __init__(self, config: Optional[Dict] = None) -> None: @@ -34,17 +35,17 @@ def __init__(self, config: Optional[Dict] = None) -> None: # --- Support Modules ---------------------------------------------------------------------------------------------- def printable_ratio(self, text: bytes) -> float: - """ Calcuate the ratio of printable characters to total characters in text """ + """Calcuate the ratio of printable characters to total characters in text""" return float(float(len(text.translate(None, self.BINCHARS))) / float(len(text))) @staticmethod def encode_codepoint(codepoint: int) -> bytes: - """ Returns the utf-8 encoding of a unicode codepoint """ - return chr(codepoint).encode('utf-8') + """Returns the utf-8 encoding of a unicode codepoint""" + return chr(codepoint).encode("utf-8") @staticmethod def codepoint_sub(match: regex.Match, base: int = 16) -> bytes: - """ Replace method for unicode codepoint regex substitutions. + """Replace method for unicode codepoint regex substitutions. Args: match: The regex match object with the text of the unicode codepoint value as group 1. @@ -60,54 +61,54 @@ def codepoint_sub(match: regex.Match, base: int = 16) -> bytes: @staticmethod def add1b(s: bytes, k: int) -> bytes: - """ Add k to each byte of s """ - return bytes([(c + k) & 0xff for c in s]) + """Add k to each byte of s""" + return bytes([(c + k) & 0xFF for c in s]) @staticmethod def charcode(text: bytes) -> Optional[bytes]: - """ Replace character codes with the corresponding characters """ + """Replace character codes with the corresponding characters""" # Todo: something to handle powershell bytes syntax @staticmethod def charcode_hex(text: bytes) -> Optional[bytes]: - """ Replace hex character codes with the corresponding characters """ - output = regex.sub(rb'(?i)(?:\\x|%)([a-f0-9]{2})', lambda m: binascii.unhexlify(m.group(1)), text) + """Replace hex character codes with the corresponding characters""" + output = regex.sub(rb"(?i)(?:\\x|%)([a-f0-9]{2})", lambda m: binascii.unhexlify(m.group(1)), text) return output if output != text else None # Todo: find a way to prevent charcode_oct from mangling windows filepaths with sections that start with 0-7 @staticmethod def charcode_oct(text: bytes) -> Optional[bytes]: - """ Replace octal character codes with the corresponding characters """ - output = regex.sub(rb'\\([0-7]{1,3})', partial(DeobfuScripter.codepoint_sub, base=8), text) + """Replace octal character codes with the corresponding characters""" + output = regex.sub(rb"\\([0-7]{1,3})", partial(DeobfuScripter.codepoint_sub, base=8), text) return output if output != text else None @staticmethod def charcode_unicode(text: bytes) -> Optional[bytes]: - """ Replace unicode character codes with the corresponding utf-8 byte sequence""" - output = regex.sub(rb'(?i)(?:\\u|%u)([a-f0-9]{4})', DeobfuScripter.codepoint_sub, text) + """Replace unicode character codes with the corresponding utf-8 byte sequence""" + output = regex.sub(rb"(?i)(?:\\u|%u)([a-f0-9]{4})", DeobfuScripter.codepoint_sub, text) return output if output != text else None @staticmethod def charcode_xml(text: bytes) -> Optional[bytes]: - """ Replace XML escape sequences with the corresponding character """ - output = regex.sub(rb'(?i)&#x([a-z0-9]{1,6});', DeobfuScripter.codepoint_sub, text) - output = regex.sub(rb'&#([0-9]{1,7});', partial(DeobfuScripter.codepoint_sub, base=10), output) + """Replace XML escape sequences with the corresponding character""" + output = regex.sub(rb"(?i)&#x([a-z0-9]{1,6});", DeobfuScripter.codepoint_sub, text) + output = regex.sub(rb"&#([0-9]{1,7});", partial(DeobfuScripter.codepoint_sub, base=10), output) return output if output != text else None @staticmethod def hex_constant(text: bytes) -> Optional[bytes]: - """ Replace hexadecimal integer constants with decimal ones""" - output = regex.sub(rb'(?i)\b0x([a-f0-9]{1,16})\b', lambda m: str(int(m.group(1), 16)).encode('utf-8'), text) + """Replace hexadecimal integer constants with decimal ones""" + output = regex.sub(rb"(?i)\b0x([a-f0-9]{1,16})\b", lambda m: str(int(m.group(1), 16)).encode("utf-8"), text) return output if output != text else None @staticmethod def chr_decode(text: bytes) -> Optional[bytes]: - """ Replace calls to chr with the corresponding character """ + """Replace calls to chr with the corresponding character""" output = text - for fullc, c in regex.findall(rb'(chr[bw]?\(([0-9]{1,3})\))', output, regex.I): + for fullc, c in regex.findall(rb"(chr[bw]?\(([0-9]{1,3})\))", output, regex.I): # noinspection PyBroadException try: - output = regex.sub(regex.escape(fullc), f'"{chr(int(c))}"'.encode('utf-8'), output) + output = regex.sub(regex.escape(fullc), f'"{chr(int(c))}"'.encode("utf-8"), output) except Exception: continue if output == text: @@ -116,23 +117,25 @@ def chr_decode(text: bytes) -> Optional[bytes]: @staticmethod def string_replace(text: bytes) -> Optional[bytes]: - """ Replace calls to replace() with their output """ - if b'replace(' in text.lower(): + """Replace calls to replace() with their output""" + if b"replace(" in text.lower(): # Process string with replace functions calls # Such as "SaokzueofpigxoFile".replace(/ofpigx/g, "T").replace(/okzu/g, "v") output = text # Find all occurrences of string replace (JS) - for strreplace in [o[0] for o in - regex.findall(rb'(["\'][^"\']+["\']((\.replace\([^)]+\))+))', output, flags=regex.I)]: + for strreplace in [ + o[0] for o in regex.findall(rb'(["\'][^"\']+["\']((\.replace\([^)]+\))+))', output, flags=regex.I) + ]: substitute = strreplace # Extract all substitutions - for str1, str2 in regex.findall(rb'\.replace\([/\'"]([^,]+)[/\'\"]g?\s*,\s*[\'\"]([^)]*)[\'\"]\)', - substitute, flags=regex.I): + for str1, str2 in regex.findall( + rb'\.replace\([/\'"]([^,]+)[/\'\"]g?\s*,\s*[\'\"]([^)]*)[\'\"]\)', substitute, flags=regex.I + ): # Execute the substitution substitute = substitute.replace(str1, str2) # Remove the replace calls from the layer (prevent accidental substitutions in the next step) - if b'.replace(' in substitute.lower(): - substitute = substitute[:substitute.lower().index(b'.replace(')] + if b".replace(" in substitute.lower(): + substitute = substitute[: substitute.lower().index(b".replace(")] output = output.replace(strreplace, substitute) # Process global string replace @@ -140,41 +143,45 @@ def string_replace(text: bytes) -> Optional[bytes]: for str1, str2 in replacements: output = output.replace(str1, str2) # Process VB string replace - replacements = regex.findall(rb'Replace\(\s*["\']?([^,"\']*)["\']?\s*,\s*["\']?' - rb'([^,"\']*)["\']?\s*,\s*["\']?([^,"\']*)["\']?', output) + replacements = regex.findall( + rb'Replace\(\s*["\']?([^,"\']*)["\']?\s*,\s*["\']?' rb'([^,"\']*)["\']?\s*,\s*["\']?([^,"\']*)["\']?', + output, + ) for str1, str2, str3 in replacements: output = output.replace(str1, str1.replace(str2, str3)) - output = regex.sub(rb'\.replace\(\s*/([^)]+)/g?, [\'"]([^\'"]*)[\'"]\)', b'', output) + output = regex.sub(rb'\.replace\(\s*/([^)]+)/g?, [\'"]([^\'"]*)[\'"]\)', b"", output) if output != text: return output return None def b64decode_str(self, text: bytes) -> Optional[bytes]: - """ Decode base64 """ + """Decode base64""" output = text head: bytes bmatch: bytes tail: bytes - for head, bmatch, tail in regex.findall(rb'((?:atob\()+)\'([A-Za-z0-9+/]+={0,2})\'(\)+)', text): - iters = min(len(head)//5, len(tail)) + for head, bmatch, tail in regex.findall(rb"((?:atob\()+)\'([A-Za-z0-9+/]+={0,2})\'(\)+)", text): + iters = min(len(head) // 5, len(tail)) d = bmatch for _ in range(iters): try: d = binascii.a2b_base64(d) except binascii.Error: break - output = output.replace(b'atob('*iters + b"'" + bmatch + b"'" + b')'*iters, b"'" + d + b"'") + output = output.replace(b"atob(" * iters + b"'" + bmatch + b"'" + b")" * iters, b"'" + d + b"'") - b64str: list[bytes] = regex.findall(b'((?:[A-Za-z0-9+/]{3,}={0,2}(?:&#[x1][A0];)?[\r]?[\n]?){6,})', text) + b64str: list[bytes] = regex.findall(b"((?:[A-Za-z0-9+/]{3,}={0,2}(?:&#[x1][A0];)?[\r]?[\n]?){6,})", text) for bmatch in b64str: if bmatch not in output: continue # was already processed by atob - s = (bmatch.replace(b'\n', b'') - .replace(b'\r', b'') - .replace(b' ', b'') - .replace(b' ', b'') - .replace(b' ', b'')) + s = ( + bmatch.replace(b"\n", b"") + .replace(b"\r", b"") + .replace(b" ", b"") + .replace(b" ", b"") + .replace(b" ", b"") + ) uniq_char = set(s) if len(uniq_char) <= 6 or len(s) < 16 or len(s) % 4: continue @@ -190,10 +197,10 @@ def b64decode_str(self, text: bytes) -> Optional[bytes]: ftype = m.from_buffer(d) mag_ftype = mag.from_buffer(d) for file_type in self.FILETYPES: - if (file_type in ftype and 'octet-stream' not in ftype) or file_type in mag_ftype: + if (file_type in ftype and "octet-stream" not in ftype) or file_type in mag_ftype: b64_file_name = f"{sha256hash[0:10]}_b64_decoded" b64_file_path = os.path.join(self.working_directory, b64_file_name) - with open(b64_file_path, 'wb') as b64_file: + with open(b64_file_path, "wb") as b64_file: b64_file.write(d) self.files_extracted.add(b64_file_path) self.hashes.add(sha256hash) @@ -203,7 +210,7 @@ def b64decode_str(self, text: bytes) -> Optional[bytes]: output = output.replace(bmatch, d) else: # Test for ASCII seperated by \x00 - p = d.replace(b'\x00', b'') + p = d.replace(b"\x00", b"") if len(set(p)) > 6 and all(8 < c < 127 for c in p) and len(regex.sub(rb"\s", b"", p)) > 14: output = output.replace(bmatch, p) @@ -213,14 +220,14 @@ def b64decode_str(self, text: bytes) -> Optional[bytes]: @staticmethod def vars_of_fake_arrays(text: bytes) -> Optional[bytes]: - """ Parse variables of fake arrays """ - replacements = regex.findall(rb'var\s+([^\s=]+)\s*=\s*\[([^\]]+)\]\[(\d+)\]', text) + """Parse variables of fake arrays""" + replacements = regex.findall(rb"var\s+([^\s=]+)\s*=\s*\[([^\]]+)\]\[(\d+)\]", text) if len(replacements) > 0: # ,- Make sure we do not process these again - output = regex.sub(rb'var\s+([^=]+)\s*=', rb'XXX \1 =', text) + output = regex.sub(rb"var\s+([^=]+)\s*=", rb"XXX \1 =", text) for varname, array, pos in replacements: try: - value = regex.split(rb'\s*,\s*', array)[int(pos)] + value = regex.split(rb"\s*,\s*", array)[int(pos)] except IndexError: # print '[' + array + '][' + pos + ']' break @@ -230,19 +237,20 @@ def vars_of_fake_arrays(text: bytes) -> Optional[bytes]: return None def array_of_strings(self, text: bytes) -> Optional[bytes]: - """ Replace arrays of strings with the combined string """ + """Replace arrays of strings with the combined string""" # noinspection PyBroadException try: - replacements = regex.findall(rb'var\s+([^\s=]+)\s*=\s*\[([^\]]+)\]\s*;', text) + replacements = regex.findall(rb"var\s+([^\s=]+)\s*=\s*\[([^\]]+)\]\s*;", text) if len(replacements) > 0: # ,- Make sure we do not process these again output = text for varname, values in replacements: - occurences = [int(x) for x in regex.findall(varname + rb'\s*\[(\d+)\]', output)] + occurences = [int(x) for x in regex.findall(varname + rb"\s*\[(\d+)\]", output)] for i in occurences: try: - output = regex.sub(varname + rb'\s*\[(%d)\]' % i, - values.split(b',')[i].replace(b'\\', b'\\\\'), output) + output = regex.sub( + varname + rb"\s*\[(%d)\]" % i, values.split(b",")[i].replace(b"\\", b"\\\\"), output + ) except IndexError: # print '[' + array + '][' + pos + ']' break @@ -255,16 +263,16 @@ def array_of_strings(self, text: bytes) -> Optional[bytes]: @staticmethod def concat_strings(text: bytes) -> Optional[bytes]: - """ Concatenate disconnected strings """ + """Concatenate disconnected strings""" # Line continuation character in VB -- '_' - output = regex.sub(rb'[\'"][\s\n_]*?[+&][\s\n_]*[\'"]', b'', text) + output = regex.sub(rb'[\'"][\s\n_]*?[+&][\s\n_]*[\'"]', b"", text) if output != text: return output return None @staticmethod def str_reverse(text: bytes) -> Optional[bytes]: - """ Replace StrReverse function calls with the reverse of its argument """ + """Replace StrReverse function calls with the reverse of its argument""" output = text # VBA format StrReverse("[text]") replacements = regex.findall(rb'(StrReverse\("(.+?(?="\))))', output) @@ -277,12 +285,12 @@ def str_reverse(text: bytes) -> Optional[bytes]: @staticmethod def powershell_vars(text: bytes) -> Optional[bytes]: - """ Replace PowerShell variables with their values """ - replacements_string = regex.findall(rb'(\$(?:\w+|{[^\}]+\}))\s*=[^=]\s*[\"\']([^\"\']+)[\"\']', text) - replacements_func = regex.findall(rb'(\$(?:\w+|{[^\}]+\}))\s*=\s*([^=\"\'\s$]{3,50})[\s]', text) + """Replace PowerShell variables with their values""" + replacements_string = regex.findall(rb"(\$(?:\w+|{[^\}]+\}))\s*=[^=]\s*[\"\']([^\"\']+)[\"\']", text) + replacements_func = regex.findall(rb"(\$(?:\w+|{[^\}]+\}))\s*=\s*([^=\"\'\s$]{3,50})[\s]", text) if len(replacements_string) > 0 or len(replacements_func) > 0: # ,- Make sure we do not process these again - output = regex.sub(rb'\$((?:\w+|{[^\}]+\}))\s*=', rb'\$--\1 =', text) + output = regex.sub(rb"\$((?:\w+|{[^\}]+\}))\s*=", rb"\$--\1 =", text) for varname, string in replacements_string: output = output.replace(varname, string) for varname, string in replacements_func: @@ -294,7 +302,7 @@ def powershell_vars(text: bytes) -> Optional[bytes]: @staticmethod def powershell_carets(text: bytes) -> Optional[bytes]: - """ Remove PowerShell carets """ + """Remove PowerShell carets""" try: if b"^" in text or b"`" in text: output = text @@ -312,24 +320,25 @@ def powershell_carets(text: bytes) -> Optional[bytes]: # noinspection PyBroadException def msoffice_embedded_script_string(self, text: bytes) -> Optional[bytes]: - """ Replace variables with their values in MSOffice embedded scripts """ + """Replace variables with their values in MSOffice embedded scripts""" try: scripts: Dict[bytes, List[bytes]] = {} output = text # bad, prevent false var replacements like YG="86" # Replace regular variables replacements = regex.findall( - rb'^(\s*(\w+)\s*=\s*\w*\s*\+?\s(["\'])(.+)["\']\s*\+\s*vbCrLf\s*$)', output, regex.M) + rb'^(\s*(\w+)\s*=\s*\w*\s*\+?\s(["\'])(.+)["\']\s*\+\s*vbCrLf\s*$)', output, regex.M + ) if len(replacements) > 0: for full, variable_name, delim, value in replacements: scripts.setdefault(variable_name, []) scripts[variable_name].append(value.replace(delim + delim, delim)) - output = output.replace(full, b'') + output = output.replace(full, b"") for script_var, script_lines in scripts.items(): - new_script_name = b'new_script__' + script_var - output = regex.sub(rb'(.+)\b' + script_var + rb'\b', b'\\1' + new_script_name, output) - output += b"\n\n\n' ---- script referenced by \"" + new_script_name + b"\" ----\n\n\n" + new_script_name = b"new_script__" + script_var + output = regex.sub(rb"(.+)\b" + script_var + rb"\b", b"\\1" + new_script_name, output) + output += b"\n\n\n' ---- script referenced by \"" + new_script_name + b'" ----\n\n\n' output += b"\n".join(script_lines) if output == text: @@ -341,49 +350,66 @@ def msoffice_embedded_script_string(self, text: bytes) -> Optional[bytes]: return None def mswordmacro_vars(self, text: bytes) -> Optional[bytes]: - """ Replaces Microsoft Word variables with their values """ + """Replaces Microsoft Word variables with their values""" # noinspection PyBroadException try: output = text # prevent false var replacements like YG="86" # Replace regular variables - replacements = regex.findall(rb'^\s*((?:Const[\s]*)?(\w+)\s*=' - rb'\s*((?:["][^"]+["]|[\'][^\']+[\']|[0-9]*)))[\s\r]*$', - output, regex.MULTILINE | regex.DOTALL) + replacements = regex.findall( + rb"^\s*((?:Const[\s]*)?(\w+)\s*=" rb'\s*((?:["][^"]+["]|[\'][^\']+[\']|[0-9]*)))[\s\r]*$', + output, + regex.MULTILINE | regex.DOTALL, + ) if len(replacements) > 0: # If one variable is defined more then once take the second definition replacements = [(v[0], k, v[1]) for k, v in {i[1]: (i[0], i[2]) for i in replacements}.items()] for full, varname, value in replacements: - if len(regex.findall(rb'\b' + varname + rb'\b', output)) == 1: + if len(regex.findall(rb"\b" + varname + rb"\b", output)) == 1: # If there is only one instance of these, it's probably noise. - output = output.replace(full, b'') + output = output.replace(full, b"") else: final_val = value.replace(b'"', b"") # Stacked strings # b = "he" # b = b & "llo " # b = b & "world!" - stacked = regex.findall(rb'^\s*(' + varname + rb'\s*=\s*' - + varname + rb'\s*[+&]\s*((?:["][^"]+["]|[\'][^\']+[\'])))[\s\r]*$', - output, regex.MULTILINE | regex.DOTALL) + stacked = regex.findall( + rb"^\s*(" + + varname + + rb"\s*=\s*" + + varname + + rb'\s*[+&]\s*((?:["][^"]+["]|[\'][^\']+[\'])))[\s\r]*$', + output, + regex.MULTILINE | regex.DOTALL, + ) if len(stacked) > 0: for sfull, val in stacked: final_val += val.replace(b'"', b"") - output = output.replace(sfull, b'') - output = output.replace(full, b'') + output = output.replace(sfull, b"") + output = output.replace(full, b"") # If more than a of the variable name left, the assumption is that this did not # work according to plan, so just replace a few for now. - output = regex.sub(rb'(\b' + regex.escape(varname) + - rb'(?!\s*(?:=|[+&]\s*' + regex.escape(varname) + rb'))\b)', - b'"' + final_val.replace(b"\\", b"\\\\") + b'"', - output, count=5) + output = regex.sub( + rb"(\b" + + regex.escape(varname) + + rb"(?!\s*(?:=|[+&]\s*" + + regex.escape(varname) + + rb"))\b)", + b'"' + final_val.replace(b"\\", b"\\\\") + b'"', + output, + count=5, + ) # output = regex.sub(rb'(.*[^\s].*)\b' + varname + rb'\b', # b'\\1"' + final_val.replace(b"\\", b"\\\\") + b'"', # output) # Remaining stacked strings - replacements = regex.findall(rb'^\s*((\w+)\s*=\s*(\w+)\s*[+&]\s*((?:["][^"]+["]|[\'][^\']+[\'])))[\s\r]*$', - output, regex.MULTILINE | regex.DOTALL) + replacements = regex.findall( + rb'^\s*((\w+)\s*=\s*(\w+)\s*[+&]\s*((?:["][^"]+["]|[\'][^\']+[\'])))[\s\r]*$', + output, + regex.MULTILINE | regex.DOTALL, + ) replacements_vars = {x[1] for x in replacements} for v in replacements_vars: final_val = b"" @@ -391,11 +417,13 @@ def mswordmacro_vars(self, text: bytes) -> Optional[bytes]: if varname != v: continue final_val += value.replace(b'"', b"") - output = output.replace(full, b'') - output = regex.sub(rb'(\b' + v + - rb'(?!\s*(?:=|[+&]\s*' + v + rb'))\b)', - b'"' + final_val.replace(b"\\", b"\\\\") + b'"', - output, count=5) + output = output.replace(full, b"") + output = regex.sub( + rb"(\b" + v + rb"(?!\s*(?:=|[+&]\s*" + v + rb"))\b)", + b'"' + final_val.replace(b"\\", b"\\\\") + b'"', + output, + count=5, + ) if output == text: return None @@ -406,7 +434,7 @@ def mswordmacro_vars(self, text: bytes) -> Optional[bytes]: return None def simple_xor_function(self, text: bytes) -> Optional[bytes]: - """ Tries XORing the text with potential keys found in the text """ + """Tries XORing the text with potential keys found in the text""" xorstrings = regex.findall(rb'(\w+\("((?:[0-9A-Fa-f][0-9A-Fa-f])+)"\s*,\s*"([^"]+)"\))', text) option_a: List[Tuple[bytes, bytes, bytes, Optional[bytes]]] = [] option_b: List[Tuple[bytes, bytes, bytes, Optional[bytes]]] = [] @@ -442,30 +470,30 @@ def simple_xor_function(self, text: bytes) -> Optional[bytes]: @staticmethod def xor_with_key(s: bytes, k: bytes) -> bytes: - """ XOR s using the key k """ + """XOR s using the key k""" return bytes([a ^ b for a, b in zip(s, (len(s) // len(k) + 1) * k)]) @staticmethod def zp_xor_with_key(s: bytes, k: bytes) -> bytes: - """ XOR variant where xoring is skipped for 0 bytes and when the byte is equal to the keybyte """ + """XOR variant where xoring is skipped for 0 bytes and when the byte is equal to the keybyte""" return bytes([a if a in (0, b) else a ^ b for a, b in zip(s, (len(s) // len(k) + 1) * k)]) @staticmethod def clean_up_final_layer(text: bytes) -> bytes: - """ Remove deobfuscripter artifacts from final layer for display """ - output = regex.sub(rb'\r', b'', text) - output = regex.sub(rb']+>\n?', b'', output) + """Remove deobfuscripter artifacts from final layer for display""" + output = regex.sub(rb"\r", b"", text) + output = regex.sub(rb"]+>\n?", b"", output) return output # noinspection PyBroadException def extract_htmlscript(self, text: bytes) -> List[bytes]: - """ Extract scripts from html """ + """Extract scripts from html""" objects = [] try: - html = BeautifulSoup(text, 'lxml') - for tag_type in ['object', 'embed', 'script']: + html = BeautifulSoup(text, "lxml") + for tag_type in ["object", "embed", "script"]: for s in html.find_all(tag_type): - objects.append(str(s).encode('utf-8')) + objects.append(str(s).encode("utf-8")) except Exception as e: self.log.warning(f"Failure in extract_htmlscript function: {str(e)}") objects = [] @@ -486,51 +514,50 @@ def execute(self, request: ServiceRequest) -> None: # --- Prepare Techniques ---------------------------------------------------------------------------------- TechniqueList = List[Tuple[str, Callable[[bytes], Optional[bytes]]]] first_pass: TechniqueList = [ - ('MSOffice Embedded script', self.msoffice_embedded_script_string), - ('CHR and CHRB decode', self.chr_decode), - ('String replace', self.string_replace), - ('Powershell carets', self.powershell_carets), - ('Array of strings', self.array_of_strings), - ('Fake array vars', self.vars_of_fake_arrays), - ('Reverse strings', self.str_reverse), - ('B64 Decode', self.b64decode_str), - ('Simple XOR function', self.simple_xor_function), + ("MSOffice Embedded script", self.msoffice_embedded_script_string), + ("CHR and CHRB decode", self.chr_decode), + ("String replace", self.string_replace), + ("Powershell carets", self.powershell_carets), + ("Array of strings", self.array_of_strings), + ("Fake array vars", self.vars_of_fake_arrays), + ("Reverse strings", self.str_reverse), + ("B64 Decode", self.b64decode_str), + ("Simple XOR function", self.simple_xor_function), ] second_pass: TechniqueList = [ - ('Concat strings', self.concat_strings), - ('MSWord macro vars', self.mswordmacro_vars), - ('Powershell vars', self.powershell_vars), - ('Hex Charcodes', self.charcode_hex), + ("Concat strings", self.concat_strings), + ("MSWord macro vars", self.mswordmacro_vars), + ("Powershell vars", self.powershell_vars), + ("Hex Charcodes", self.charcode_hex), # ('Octal Charcodes', self.charcode_oct), - ('Unicode Charcodes', self.charcode_unicode), - ('XML Charcodes', self.charcode_xml), - ('Hex Int Constants', self.hex_constant), + ("Unicode Charcodes", self.charcode_unicode), + ("XML Charcodes", self.charcode_xml), + ("Hex Int Constants", self.hex_constant), ] second_pass.extend(first_pass) final_pass: TechniqueList = [] - code_extracts = [ - ('.*html.*', "HTML scripts extraction", self.extract_htmlscript) - ] + code_extracts = [(".*html.*", "HTML scripts extraction", self.extract_htmlscript)] layers_list: list[str] = [] layer = request.file_contents # --- Stage 1: Script Extraction -------------------------------------------------------------------------- - if request.file_type == 'code/ps1': + if request.file_type == "code/ps1": sig = regex.search( - rb'# SIG # Begin signature block\r\n(?:# [A-Za-z0-9+/=]+\r\n)+# SIG # End signature block', - request.file_contents) + rb"# SIG # Begin signature block\r\n(?:# [A-Za-z0-9+/=]+\r\n)+# SIG # End signature block", + request.file_contents, + ) if sig: - layer = layer[:sig.start()] + layer[sig.end():] - lines = sig.group().split(b'\r\n# ') - base64 = b''.join(line.strip() for line in lines[1:-1]) + layer = layer[: sig.start()] + layer[sig.end() :] + lines = sig.group().split(b"\r\n# ") + base64 = b"".join(line.strip() for line in lines[1:-1]) try: # Extract signature signature = binascii.a2b_base64(base64) - sig_filename = 'powershell_signature' + sig_filename = "powershell_signature" sig_path = os.path.join(self.working_directory, sig_filename) - with open(sig_path, 'wb+') as f: + with open(sig_path, "wb+") as f: f.write(signature) request.add_extracted(sig_path, sig_filename, "Powershell Signature") except binascii.Error: @@ -574,10 +601,13 @@ def execute(self, request: ServiceRequest) -> None: layer = res # --- Compiling results ----------------------------------------------------------------------------------- - if request.get_param('extract_original_iocs'): + if request.get_param("extract_original_iocs"): pat_values = patterns.ioc_match(before_deobfuscation, bogon_ip=True, just_network=False) - ioc_res = ResultSection("The following IOCs were found in the original file", parent=request.result, - body_format=BODY_FORMAT.MEMORY_DUMP) + ioc_res = ResultSection( + "The following IOCs were found in the original file", + parent=request.result, + body_format=BODY_FORMAT.MEMORY_DUMP, + ) for k, val in pat_values.items(): for v in val: if ioc_res: @@ -593,9 +623,9 @@ def execute(self, request: ServiceRequest) -> None: # Display obfuscation steps heuristic = Heuristic(1) - mres = ResultSection("De-obfuscation steps taken by DeobsfuScripter", - parent=request.result, - heuristic=heuristic) + mres = ResultSection( + "De-obfuscation steps taken by DeobsfuScripter", parent=request.result, heuristic=heuristic + ) tech_count = Counter(layers_list) for tech, count in tech_count.items(): @@ -607,8 +637,8 @@ def execute(self, request: ServiceRequest) -> None: diff_tags: Dict[str, List[bytes]] = {} for ioc_type, iocs in pat_values.items(): for ioc in iocs: - if ioc_type == 'network.static.uri': - if b'/'.join(ioc.split(b'/', 3)[:3]) not in before_deobfuscation: + if ioc_type == "network.static.uri": + if b"/".join(ioc.split(b"/", 3)[:3]) not in before_deobfuscation: diff_tags.setdefault(ioc_type, []) diff_tags[ioc_type].append(ioc) elif ioc not in before_deobfuscation: @@ -621,8 +651,8 @@ def execute(self, request: ServiceRequest) -> None: reversed_file = before_deobfuscation[::-1] for ioc_type, iocs in rev_values.items(): for ioc in iocs: - if ioc_type == 'network.static.uri': - if b'/'.join(ioc.split(b'/', 3)[:3]) not in reversed_file: + if ioc_type == "network.static.uri": + if b"/".join(ioc.split(b"/", 3)[:3]) not in reversed_file: rev_tags.setdefault(ioc_type, []) rev_tags[ioc_type].append(ioc) elif ioc not in reversed_file and ioc[::-1] not in diff_tags.get(ioc_type, []): @@ -638,21 +668,26 @@ def execute(self, request: ServiceRequest) -> None: file_path = os.path.join(self.working_directory, file_name) # Ensure directory exists before write os.makedirs(os.path.dirname(file_path), exist_ok=True) - with open(file_path, 'wb+') as f: + with open(file_path, "wb+") as f: f.write(clean) self.log.debug(f"Submitted dropped file for analysis: {file_path}") request.add_supplementary(file_path, file_name, "Final deobfuscated layer") - ResultSection(f"First {byte_count} bytes of the final layer:", body=safe_str(clean[:byte_count]), - body_format=BODY_FORMAT.MEMORY_DUMP, parent=request.result) + ResultSection( + f"First {byte_count} bytes of the final layer:", + body=safe_str(clean[:byte_count]), + body_format=BODY_FORMAT.MEMORY_DUMP, + parent=request.result, + ) # Display new IOCs from final layer if diff_tags or rev_tags: - ioc_new = ResultSection("New IOCs found after de-obfustcation", parent=request.result, - body_format=BODY_FORMAT.MEMORY_DUMP) + ioc_new = ResultSection( + "New IOCs found after de-obfustcation", parent=request.result, body_format=BODY_FORMAT.MEMORY_DUMP + ) has_network_heur = False for ty, val in chain(diff_tags.items(), rev_tags.items()): - if "network" in ty and ty != 'network.static.domain': + if "network" in ty and ty != "network.static.domain": has_network_heur = True for v in val: ioc_new.add_line(f"Found {ty.upper().replace('.', ' ')}: {safe_str(v)}") @@ -664,14 +699,21 @@ def execute(self, request: ServiceRequest) -> None: ioc_new.set_heuristic(6) if len(self.files_extracted) > 0: - ext_file_res = ResultSection("The following files were extracted during the deobfuscation", - heuristic=Heuristic(8), parent=request.result) + ext_file_res = ResultSection( + "The following files were extracted during the deobfuscation", + heuristic=Heuristic(8), + parent=request.result, + ) for extracted in self.files_extracted: file_name = os.path.basename(extracted) try: - if request.add_extracted(extracted, file_name, "File of interest deobfuscated from sample", - safelist_interface=self.api_interface): + if request.add_extracted( + extracted, + file_name, + "File of interest deobfuscated from sample", + safelist_interface=self.api_interface, + ): ext_file_res.add_line(file_name) except MaxExtractedExceeded: - self.log.warning('Extraction limit exceeded while adding files of interest.') + self.log.warning("Extraction limit exceeded while adding files of interest.") break From dae7cf9af0925f737e161ab45f77b8e58a1940e4 Mon Sep 17 00:00:00 2001 From: cccs-kevin Date: Fri, 3 Nov 2023 15:35:18 +0000 Subject: [PATCH 2/2] Adding git blame ignore revs --- .git-blame-ignore-revs | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .git-blame-ignore-revs diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000..e1e8b15 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,2 @@ +# Migrate code style to Black +a9f58142052ae5258bc9ae859a7567187699469d