From a9f58142052ae5258bc9ae859a7567187699469d Mon Sep 17 00:00:00 2001
From: cccs-kevin <kevin.hardy-cooper@cyber.gc.ca>
Date: Fri, 3 Nov 2023 12:14:39 +0000
Subject: [PATCH 1/2] Updating VSCode settings

---
 .vscode/settings.json |  16 +-
 deobs.py              | 344 ++++++++++++++++++++++++------------------
 2 files changed, 198 insertions(+), 162 deletions(-)

diff --git a/.vscode/settings.json b/.vscode/settings.json
index 49d23d0..1d6ab98 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -18,21 +18,15 @@
         "--profile=black",
         // "--src=${workspaceFolder}"
     ],
-    "python.formatting.autopep8Args": [
-        "--max-line-length",
-        "120",
-        "--experimental"
-    ],
-    "python.formatting.provider": "autopep8",
-    "python.formatting.blackArgs": [
+    "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter"
+    },
+    "black-formatter.args": [
         "--line-length=120"
     ],
-    "python.linting.enabled": true,
-    "python.linting.flake8Enabled": true,
-    "python.linting.flake8Args": [
+    "flake8.args": [
         "--max-line-length=120",
         //Added the ignore of E203 for now : https://github.com/PyCQA/pycodestyle/issues/373
         "--ignore=E203,W503"
     ],
-    "python.linting.pylintEnabled": false,
 }
diff --git a/deobs.py b/deobs.py
index 50253be..1684cdc 100644
--- a/deobs.py
+++ b/deobs.py
@@ -21,9 +21,10 @@
 
 
 class DeobfuScripter(ServiceBase):
-    """ Service for deobfuscating scripts """
-    FILETYPES = ['application', 'document', 'exec', 'image', 'Microsoft', 'text']
-    VALIDCHARS = b' 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
+    """Service for deobfuscating scripts"""
+
+    FILETYPES = ["application", "document", "exec", "image", "Microsoft", "text"]
+    VALIDCHARS = b" 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
     BINCHARS = bytes(list(set(range(0, 256)) - set(VALIDCHARS)))
 
     def __init__(self, config: Optional[Dict] = None) -> None:
@@ -34,17 +35,17 @@ def __init__(self, config: Optional[Dict] = None) -> None:
     # --- Support Modules ----------------------------------------------------------------------------------------------
 
     def printable_ratio(self, text: bytes) -> float:
-        """ Calcuate the ratio of printable characters to total characters in text """
+        """Calcuate the ratio of printable characters to total characters in text"""
         return float(float(len(text.translate(None, self.BINCHARS))) / float(len(text)))
 
     @staticmethod
     def encode_codepoint(codepoint: int) -> bytes:
-        """ Returns the utf-8 encoding of a unicode codepoint """
-        return chr(codepoint).encode('utf-8')
+        """Returns the utf-8 encoding of a unicode codepoint"""
+        return chr(codepoint).encode("utf-8")
 
     @staticmethod
     def codepoint_sub(match: regex.Match, base: int = 16) -> bytes:
-        """ Replace method for unicode codepoint regex substitutions.
+        """Replace method for unicode codepoint regex substitutions.
 
         Args:
             match: The regex match object with the text of the unicode codepoint value as group 1.
@@ -60,54 +61,54 @@ def codepoint_sub(match: regex.Match, base: int = 16) -> bytes:
 
     @staticmethod
     def add1b(s: bytes, k: int) -> bytes:
-        """ Add k to each byte of s """
-        return bytes([(c + k) & 0xff for c in s])
+        """Add k to each byte of s"""
+        return bytes([(c + k) & 0xFF for c in s])
 
     @staticmethod
     def charcode(text: bytes) -> Optional[bytes]:
-        """ Replace character codes with the corresponding characters """
+        """Replace character codes with the corresponding characters"""
         # Todo: something to handle powershell bytes syntax
 
     @staticmethod
     def charcode_hex(text: bytes) -> Optional[bytes]:
-        """ Replace hex character codes with the corresponding characters """
-        output = regex.sub(rb'(?i)(?:\\x|%)([a-f0-9]{2})', lambda m: binascii.unhexlify(m.group(1)), text)
+        """Replace hex character codes with the corresponding characters"""
+        output = regex.sub(rb"(?i)(?:\\x|%)([a-f0-9]{2})", lambda m: binascii.unhexlify(m.group(1)), text)
         return output if output != text else None
 
     # Todo: find a way to prevent charcode_oct from mangling windows filepaths with sections that start with 0-7
     @staticmethod
     def charcode_oct(text: bytes) -> Optional[bytes]:
-        """ Replace octal character codes with the corresponding characters """
-        output = regex.sub(rb'\\([0-7]{1,3})', partial(DeobfuScripter.codepoint_sub, base=8), text)
+        """Replace octal character codes with the corresponding characters"""
+        output = regex.sub(rb"\\([0-7]{1,3})", partial(DeobfuScripter.codepoint_sub, base=8), text)
         return output if output != text else None
 
     @staticmethod
     def charcode_unicode(text: bytes) -> Optional[bytes]:
-        """ Replace unicode character codes with the corresponding utf-8 byte sequence"""
-        output = regex.sub(rb'(?i)(?:\\u|%u)([a-f0-9]{4})', DeobfuScripter.codepoint_sub, text)
+        """Replace unicode character codes with the corresponding utf-8 byte sequence"""
+        output = regex.sub(rb"(?i)(?:\\u|%u)([a-f0-9]{4})", DeobfuScripter.codepoint_sub, text)
         return output if output != text else None
 
     @staticmethod
     def charcode_xml(text: bytes) -> Optional[bytes]:
-        """ Replace XML escape sequences with the corresponding character """
-        output = regex.sub(rb'(?i)&#x([a-z0-9]{1,6});', DeobfuScripter.codepoint_sub, text)
-        output = regex.sub(rb'&#([0-9]{1,7});', partial(DeobfuScripter.codepoint_sub, base=10), output)
+        """Replace XML escape sequences with the corresponding character"""
+        output = regex.sub(rb"(?i)&#x([a-z0-9]{1,6});", DeobfuScripter.codepoint_sub, text)
+        output = regex.sub(rb"&#([0-9]{1,7});", partial(DeobfuScripter.codepoint_sub, base=10), output)
         return output if output != text else None
 
     @staticmethod
     def hex_constant(text: bytes) -> Optional[bytes]:
-        """ Replace hexadecimal integer constants with decimal ones"""
-        output = regex.sub(rb'(?i)\b0x([a-f0-9]{1,16})\b', lambda m: str(int(m.group(1), 16)).encode('utf-8'), text)
+        """Replace hexadecimal integer constants with decimal ones"""
+        output = regex.sub(rb"(?i)\b0x([a-f0-9]{1,16})\b", lambda m: str(int(m.group(1), 16)).encode("utf-8"), text)
         return output if output != text else None
 
     @staticmethod
     def chr_decode(text: bytes) -> Optional[bytes]:
-        """ Replace calls to chr with the corresponding character """
+        """Replace calls to chr with the corresponding character"""
         output = text
-        for fullc, c in regex.findall(rb'(chr[bw]?\(([0-9]{1,3})\))', output, regex.I):
+        for fullc, c in regex.findall(rb"(chr[bw]?\(([0-9]{1,3})\))", output, regex.I):
             # noinspection PyBroadException
             try:
-                output = regex.sub(regex.escape(fullc), f'"{chr(int(c))}"'.encode('utf-8'), output)
+                output = regex.sub(regex.escape(fullc), f'"{chr(int(c))}"'.encode("utf-8"), output)
             except Exception:
                 continue
         if output == text:
@@ -116,23 +117,25 @@ def chr_decode(text: bytes) -> Optional[bytes]:
 
     @staticmethod
     def string_replace(text: bytes) -> Optional[bytes]:
-        """ Replace calls to replace() with their output """
-        if b'replace(' in text.lower():
+        """Replace calls to replace() with their output"""
+        if b"replace(" in text.lower():
             # Process string with replace functions calls
             # Such as "SaokzueofpigxoFile".replace(/ofpigx/g, "T").replace(/okzu/g, "v")
             output = text
             # Find all occurrences of string replace (JS)
-            for strreplace in [o[0] for o in
-                               regex.findall(rb'(["\'][^"\']+["\']((\.replace\([^)]+\))+))', output, flags=regex.I)]:
+            for strreplace in [
+                o[0] for o in regex.findall(rb'(["\'][^"\']+["\']((\.replace\([^)]+\))+))', output, flags=regex.I)
+            ]:
                 substitute = strreplace
                 # Extract all substitutions
-                for str1, str2 in regex.findall(rb'\.replace\([/\'"]([^,]+)[/\'\"]g?\s*,\s*[\'\"]([^)]*)[\'\"]\)',
-                                                substitute, flags=regex.I):
+                for str1, str2 in regex.findall(
+                    rb'\.replace\([/\'"]([^,]+)[/\'\"]g?\s*,\s*[\'\"]([^)]*)[\'\"]\)', substitute, flags=regex.I
+                ):
                     # Execute the substitution
                     substitute = substitute.replace(str1, str2)
                 # Remove the replace calls from the layer (prevent accidental substitutions in the next step)
-                if b'.replace(' in substitute.lower():
-                    substitute = substitute[:substitute.lower().index(b'.replace(')]
+                if b".replace(" in substitute.lower():
+                    substitute = substitute[: substitute.lower().index(b".replace(")]
                 output = output.replace(strreplace, substitute)
 
             # Process global string replace
@@ -140,41 +143,45 @@ def string_replace(text: bytes) -> Optional[bytes]:
             for str1, str2 in replacements:
                 output = output.replace(str1, str2)
             # Process VB string replace
-            replacements = regex.findall(rb'Replace\(\s*["\']?([^,"\']*)["\']?\s*,\s*["\']?'
-                                         rb'([^,"\']*)["\']?\s*,\s*["\']?([^,"\']*)["\']?', output)
+            replacements = regex.findall(
+                rb'Replace\(\s*["\']?([^,"\']*)["\']?\s*,\s*["\']?' rb'([^,"\']*)["\']?\s*,\s*["\']?([^,"\']*)["\']?',
+                output,
+            )
             for str1, str2, str3 in replacements:
                 output = output.replace(str1, str1.replace(str2, str3))
-            output = regex.sub(rb'\.replace\(\s*/([^)]+)/g?, [\'"]([^\'"]*)[\'"]\)', b'', output)
+            output = regex.sub(rb'\.replace\(\s*/([^)]+)/g?, [\'"]([^\'"]*)[\'"]\)', b"", output)
             if output != text:
                 return output
         return None
 
     def b64decode_str(self, text: bytes) -> Optional[bytes]:
-        """ Decode base64 """
+        """Decode base64"""
         output = text
 
         head: bytes
         bmatch: bytes
         tail: bytes
-        for head, bmatch, tail in regex.findall(rb'((?:atob\()+)\'([A-Za-z0-9+/]+={0,2})\'(\)+)', text):
-            iters = min(len(head)//5, len(tail))
+        for head, bmatch, tail in regex.findall(rb"((?:atob\()+)\'([A-Za-z0-9+/]+={0,2})\'(\)+)", text):
+            iters = min(len(head) // 5, len(tail))
             d = bmatch
             for _ in range(iters):
                 try:
                     d = binascii.a2b_base64(d)
                 except binascii.Error:
                     break
-            output = output.replace(b'atob('*iters + b"'" + bmatch + b"'" + b')'*iters, b"'" + d + b"'")
+            output = output.replace(b"atob(" * iters + b"'" + bmatch + b"'" + b")" * iters, b"'" + d + b"'")
 
-        b64str: list[bytes] = regex.findall(b'((?:[A-Za-z0-9+/]{3,}={0,2}(?:&#[x1][A0];)?[\r]?[\n]?){6,})', text)
+        b64str: list[bytes] = regex.findall(b"((?:[A-Za-z0-9+/]{3,}={0,2}(?:&#[x1][A0];)?[\r]?[\n]?){6,})", text)
         for bmatch in b64str:
             if bmatch not in output:
                 continue  # was already processed by atob
-            s = (bmatch.replace(b'\n', b'')
-                       .replace(b'\r', b'')
-                       .replace(b' ', b'')
-                       .replace(b'&#xA;', b'')
-                       .replace(b'&#10;', b''))
+            s = (
+                bmatch.replace(b"\n", b"")
+                .replace(b"\r", b"")
+                .replace(b" ", b"")
+                .replace(b"&#xA;", b"")
+                .replace(b"&#10;", b"")
+            )
             uniq_char = set(s)
             if len(uniq_char) <= 6 or len(s) < 16 or len(s) % 4:
                 continue
@@ -190,10 +197,10 @@ def b64decode_str(self, text: bytes) -> Optional[bytes]:
                     ftype = m.from_buffer(d)
                     mag_ftype = mag.from_buffer(d)
                     for file_type in self.FILETYPES:
-                        if (file_type in ftype and 'octet-stream' not in ftype) or file_type in mag_ftype:
+                        if (file_type in ftype and "octet-stream" not in ftype) or file_type in mag_ftype:
                             b64_file_name = f"{sha256hash[0:10]}_b64_decoded"
                             b64_file_path = os.path.join(self.working_directory, b64_file_name)
-                            with open(b64_file_path, 'wb') as b64_file:
+                            with open(b64_file_path, "wb") as b64_file:
                                 b64_file.write(d)
                             self.files_extracted.add(b64_file_path)
                             self.hashes.add(sha256hash)
@@ -203,7 +210,7 @@ def b64decode_str(self, text: bytes) -> Optional[bytes]:
                     output = output.replace(bmatch, d)
                 else:
                     # Test for ASCII seperated by \x00
-                    p = d.replace(b'\x00', b'')
+                    p = d.replace(b"\x00", b"")
                     if len(set(p)) > 6 and all(8 < c < 127 for c in p) and len(regex.sub(rb"\s", b"", p)) > 14:
                         output = output.replace(bmatch, p)
 
@@ -213,14 +220,14 @@ def b64decode_str(self, text: bytes) -> Optional[bytes]:
 
     @staticmethod
     def vars_of_fake_arrays(text: bytes) -> Optional[bytes]:
-        """ Parse variables of fake arrays """
-        replacements = regex.findall(rb'var\s+([^\s=]+)\s*=\s*\[([^\]]+)\]\[(\d+)\]', text)
+        """Parse variables of fake arrays"""
+        replacements = regex.findall(rb"var\s+([^\s=]+)\s*=\s*\[([^\]]+)\]\[(\d+)\]", text)
         if len(replacements) > 0:
             #    ,- Make sure we do not process these again
-            output = regex.sub(rb'var\s+([^=]+)\s*=', rb'XXX \1 =', text)
+            output = regex.sub(rb"var\s+([^=]+)\s*=", rb"XXX \1 =", text)
             for varname, array, pos in replacements:
                 try:
-                    value = regex.split(rb'\s*,\s*', array)[int(pos)]
+                    value = regex.split(rb"\s*,\s*", array)[int(pos)]
                 except IndexError:
                     # print '[' + array + '][' + pos + ']'
                     break
@@ -230,19 +237,20 @@ def vars_of_fake_arrays(text: bytes) -> Optional[bytes]:
         return None
 
     def array_of_strings(self, text: bytes) -> Optional[bytes]:
-        """ Replace arrays of strings with the combined string """
+        """Replace arrays of strings with the combined string"""
         # noinspection PyBroadException
         try:
-            replacements = regex.findall(rb'var\s+([^\s=]+)\s*=\s*\[([^\]]+)\]\s*;', text)
+            replacements = regex.findall(rb"var\s+([^\s=]+)\s*=\s*\[([^\]]+)\]\s*;", text)
             if len(replacements) > 0:
                 #    ,- Make sure we do not process these again
                 output = text
                 for varname, values in replacements:
-                    occurences = [int(x) for x in regex.findall(varname + rb'\s*\[(\d+)\]', output)]
+                    occurences = [int(x) for x in regex.findall(varname + rb"\s*\[(\d+)\]", output)]
                     for i in occurences:
                         try:
-                            output = regex.sub(varname + rb'\s*\[(%d)\]' % i,
-                                               values.split(b',')[i].replace(b'\\', b'\\\\'), output)
+                            output = regex.sub(
+                                varname + rb"\s*\[(%d)\]" % i, values.split(b",")[i].replace(b"\\", b"\\\\"), output
+                            )
                         except IndexError:
                             # print '[' + array + '][' + pos + ']'
                             break
@@ -255,16 +263,16 @@ def array_of_strings(self, text: bytes) -> Optional[bytes]:
 
     @staticmethod
     def concat_strings(text: bytes) -> Optional[bytes]:
-        """ Concatenate disconnected strings """
+        """Concatenate disconnected strings"""
         # Line continuation character in VB -- '_'
-        output = regex.sub(rb'[\'"][\s\n_]*?[+&][\s\n_]*[\'"]', b'', text)
+        output = regex.sub(rb'[\'"][\s\n_]*?[+&][\s\n_]*[\'"]', b"", text)
         if output != text:
             return output
         return None
 
     @staticmethod
     def str_reverse(text: bytes) -> Optional[bytes]:
-        """ Replace StrReverse function calls with the reverse of its argument """
+        """Replace StrReverse function calls with the reverse of its argument"""
         output = text
         # VBA format StrReverse("[text]")
         replacements = regex.findall(rb'(StrReverse\("(.+?(?="\))))', output)
@@ -277,12 +285,12 @@ def str_reverse(text: bytes) -> Optional[bytes]:
 
     @staticmethod
     def powershell_vars(text: bytes) -> Optional[bytes]:
-        """ Replace PowerShell variables with their values """
-        replacements_string = regex.findall(rb'(\$(?:\w+|{[^\}]+\}))\s*=[^=]\s*[\"\']([^\"\']+)[\"\']', text)
-        replacements_func = regex.findall(rb'(\$(?:\w+|{[^\}]+\}))\s*=\s*([^=\"\'\s$]{3,50})[\s]', text)
+        """Replace PowerShell variables with their values"""
+        replacements_string = regex.findall(rb"(\$(?:\w+|{[^\}]+\}))\s*=[^=]\s*[\"\']([^\"\']+)[\"\']", text)
+        replacements_func = regex.findall(rb"(\$(?:\w+|{[^\}]+\}))\s*=\s*([^=\"\'\s$]{3,50})[\s]", text)
         if len(replacements_string) > 0 or len(replacements_func) > 0:
             #    ,- Make sure we do not process these again
-            output = regex.sub(rb'\$((?:\w+|{[^\}]+\}))\s*=', rb'\$--\1 =', text)
+            output = regex.sub(rb"\$((?:\w+|{[^\}]+\}))\s*=", rb"\$--\1 =", text)
             for varname, string in replacements_string:
                 output = output.replace(varname, string)
             for varname, string in replacements_func:
@@ -294,7 +302,7 @@ def powershell_vars(text: bytes) -> Optional[bytes]:
 
     @staticmethod
     def powershell_carets(text: bytes) -> Optional[bytes]:
-        """ Remove PowerShell carets """
+        """Remove PowerShell carets"""
         try:
             if b"^" in text or b"`" in text:
                 output = text
@@ -312,24 +320,25 @@ def powershell_carets(text: bytes) -> Optional[bytes]:
 
     # noinspection PyBroadException
     def msoffice_embedded_script_string(self, text: bytes) -> Optional[bytes]:
-        """ Replace variables with their values in MSOffice embedded scripts """
+        """Replace variables with their values in MSOffice embedded scripts"""
         try:
             scripts: Dict[bytes, List[bytes]] = {}
             output = text
             # bad, prevent false var replacements like YG="86"
             # Replace regular variables
             replacements = regex.findall(
-                rb'^(\s*(\w+)\s*=\s*\w*\s*\+?\s(["\'])(.+)["\']\s*\+\s*vbCrLf\s*$)', output, regex.M)
+                rb'^(\s*(\w+)\s*=\s*\w*\s*\+?\s(["\'])(.+)["\']\s*\+\s*vbCrLf\s*$)', output, regex.M
+            )
             if len(replacements) > 0:
                 for full, variable_name, delim, value in replacements:
                     scripts.setdefault(variable_name, [])
                     scripts[variable_name].append(value.replace(delim + delim, delim))
-                    output = output.replace(full, b'<deobsfuscripter:msoffice_embedded_script_string_var_assignment>')
+                    output = output.replace(full, b"<deobsfuscripter:msoffice_embedded_script_string_var_assignment>")
 
             for script_var, script_lines in scripts.items():
-                new_script_name = b'new_script__' + script_var
-                output = regex.sub(rb'(.+)\b' + script_var + rb'\b', b'\\1' + new_script_name, output)
-                output += b"\n\n\n' ---- script referenced by \"" + new_script_name + b"\" ----\n\n\n"
+                new_script_name = b"new_script__" + script_var
+                output = regex.sub(rb"(.+)\b" + script_var + rb"\b", b"\\1" + new_script_name, output)
+                output += b"\n\n\n' ---- script referenced by \"" + new_script_name + b'" ----\n\n\n'
                 output += b"\n".join(script_lines)
 
             if output == text:
@@ -341,49 +350,66 @@ def msoffice_embedded_script_string(self, text: bytes) -> Optional[bytes]:
             return None
 
     def mswordmacro_vars(self, text: bytes) -> Optional[bytes]:
-        """ Replaces Microsoft Word variables with their values """
+        """Replaces Microsoft Word variables with their values"""
         # noinspection PyBroadException
         try:
             output = text
             # prevent false var replacements like YG="86"
             # Replace regular variables
-            replacements = regex.findall(rb'^\s*((?:Const[\s]*)?(\w+)\s*='
-                                         rb'\s*((?:["][^"]+["]|[\'][^\']+[\']|[0-9]*)))[\s\r]*$',
-                                         output, regex.MULTILINE | regex.DOTALL)
+            replacements = regex.findall(
+                rb"^\s*((?:Const[\s]*)?(\w+)\s*=" rb'\s*((?:["][^"]+["]|[\'][^\']+[\']|[0-9]*)))[\s\r]*$',
+                output,
+                regex.MULTILINE | regex.DOTALL,
+            )
             if len(replacements) > 0:
                 # If one variable is defined more then once take the second definition
                 replacements = [(v[0], k, v[1]) for k, v in {i[1]: (i[0], i[2]) for i in replacements}.items()]
                 for full, varname, value in replacements:
-                    if len(regex.findall(rb'\b' + varname + rb'\b', output)) == 1:
+                    if len(regex.findall(rb"\b" + varname + rb"\b", output)) == 1:
                         # If there is only one instance of these, it's probably noise.
-                        output = output.replace(full, b'<deobsfuscripter:mswordmacro_unused_variable_assignment>')
+                        output = output.replace(full, b"<deobsfuscripter:mswordmacro_unused_variable_assignment>")
                     else:
                         final_val = value.replace(b'"', b"")
                         # Stacked strings
                         # b = "he"
                         # b = b & "llo "
                         # b = b & "world!"
-                        stacked = regex.findall(rb'^\s*(' + varname + rb'\s*=\s*'
-                                                + varname + rb'\s*[+&]\s*((?:["][^"]+["]|[\'][^\']+[\'])))[\s\r]*$',
-                                                output, regex.MULTILINE | regex.DOTALL)
+                        stacked = regex.findall(
+                            rb"^\s*("
+                            + varname
+                            + rb"\s*=\s*"
+                            + varname
+                            + rb'\s*[+&]\s*((?:["][^"]+["]|[\'][^\']+[\'])))[\s\r]*$',
+                            output,
+                            regex.MULTILINE | regex.DOTALL,
+                        )
                         if len(stacked) > 0:
                             for sfull, val in stacked:
                                 final_val += val.replace(b'"', b"")
-                                output = output.replace(sfull, b'<deobsfuscripter:mswordmacro_var_assignment>')
-                        output = output.replace(full, b'<deobsfuscripter:mswordmacro_var_assignment>')
+                                output = output.replace(sfull, b"<deobsfuscripter:mswordmacro_var_assignment>")
+                        output = output.replace(full, b"<deobsfuscripter:mswordmacro_var_assignment>")
                         # If more than a of the variable name left, the assumption is that this did not
                         # work according to plan, so just replace a few for now.
-                        output = regex.sub(rb'(\b' + regex.escape(varname) +
-                                           rb'(?!\s*(?:=|[+&]\s*' + regex.escape(varname) + rb'))\b)',
-                                           b'"' + final_val.replace(b"\\", b"\\\\") + b'"',
-                                           output, count=5)
+                        output = regex.sub(
+                            rb"(\b"
+                            + regex.escape(varname)
+                            + rb"(?!\s*(?:=|[+&]\s*"
+                            + regex.escape(varname)
+                            + rb"))\b)",
+                            b'"' + final_val.replace(b"\\", b"\\\\") + b'"',
+                            output,
+                            count=5,
+                        )
                         # output = regex.sub(rb'(.*[^\s].*)\b' + varname + rb'\b',
                         #                 b'\\1"' + final_val.replace(b"\\", b"\\\\") + b'"',
                         #                 output)
 
             # Remaining stacked strings
-            replacements = regex.findall(rb'^\s*((\w+)\s*=\s*(\w+)\s*[+&]\s*((?:["][^"]+["]|[\'][^\']+[\'])))[\s\r]*$',
-                                         output, regex.MULTILINE | regex.DOTALL)
+            replacements = regex.findall(
+                rb'^\s*((\w+)\s*=\s*(\w+)\s*[+&]\s*((?:["][^"]+["]|[\'][^\']+[\'])))[\s\r]*$',
+                output,
+                regex.MULTILINE | regex.DOTALL,
+            )
             replacements_vars = {x[1] for x in replacements}
             for v in replacements_vars:
                 final_val = b""
@@ -391,11 +417,13 @@ def mswordmacro_vars(self, text: bytes) -> Optional[bytes]:
                     if varname != v:
                         continue
                     final_val += value.replace(b'"', b"")
-                    output = output.replace(full, b'<deobsfuscripter:mswordmacro_var_assignment>')
-                output = regex.sub(rb'(\b' + v +
-                                   rb'(?!\s*(?:=|[+&]\s*' + v + rb'))\b)',
-                                   b'"' + final_val.replace(b"\\", b"\\\\") + b'"',
-                                   output, count=5)
+                    output = output.replace(full, b"<deobsfuscripter:mswordmacro_var_assignment>")
+                output = regex.sub(
+                    rb"(\b" + v + rb"(?!\s*(?:=|[+&]\s*" + v + rb"))\b)",
+                    b'"' + final_val.replace(b"\\", b"\\\\") + b'"',
+                    output,
+                    count=5,
+                )
 
             if output == text:
                 return None
@@ -406,7 +434,7 @@ def mswordmacro_vars(self, text: bytes) -> Optional[bytes]:
             return None
 
     def simple_xor_function(self, text: bytes) -> Optional[bytes]:
-        """ Tries XORing the text with potential keys found in the text """
+        """Tries XORing the text with potential keys found in the text"""
         xorstrings = regex.findall(rb'(\w+\("((?:[0-9A-Fa-f][0-9A-Fa-f])+)"\s*,\s*"([^"]+)"\))', text)
         option_a: List[Tuple[bytes, bytes, bytes, Optional[bytes]]] = []
         option_b: List[Tuple[bytes, bytes, bytes, Optional[bytes]]] = []
@@ -442,30 +470,30 @@ def simple_xor_function(self, text: bytes) -> Optional[bytes]:
 
     @staticmethod
     def xor_with_key(s: bytes, k: bytes) -> bytes:
-        """ XOR s using the key k """
+        """XOR s using the key k"""
         return bytes([a ^ b for a, b in zip(s, (len(s) // len(k) + 1) * k)])
 
     @staticmethod
     def zp_xor_with_key(s: bytes, k: bytes) -> bytes:
-        """ XOR variant where xoring is skipped for 0 bytes and when the byte is equal to the keybyte """
+        """XOR variant where xoring is skipped for 0 bytes and when the byte is equal to the keybyte"""
         return bytes([a if a in (0, b) else a ^ b for a, b in zip(s, (len(s) // len(k) + 1) * k)])
 
     @staticmethod
     def clean_up_final_layer(text: bytes) -> bytes:
-        """ Remove deobfuscripter artifacts from final layer for display """
-        output = regex.sub(rb'\r', b'', text)
-        output = regex.sub(rb'<deobsfuscripter:[^>]+>\n?', b'', output)
+        """Remove deobfuscripter artifacts from final layer for display"""
+        output = regex.sub(rb"\r", b"", text)
+        output = regex.sub(rb"<deobsfuscripter:[^>]+>\n?", b"", output)
         return output
 
     # noinspection PyBroadException
     def extract_htmlscript(self, text: bytes) -> List[bytes]:
-        """ Extract scripts from html """
+        """Extract scripts from html"""
         objects = []
         try:
-            html = BeautifulSoup(text, 'lxml')
-            for tag_type in ['object', 'embed', 'script']:
+            html = BeautifulSoup(text, "lxml")
+            for tag_type in ["object", "embed", "script"]:
                 for s in html.find_all(tag_type):
-                    objects.append(str(s).encode('utf-8'))
+                    objects.append(str(s).encode("utf-8"))
         except Exception as e:
             self.log.warning(f"Failure in extract_htmlscript function: {str(e)}")
             objects = []
@@ -486,51 +514,50 @@ def execute(self, request: ServiceRequest) -> None:
         # --- Prepare Techniques ----------------------------------------------------------------------------------
         TechniqueList = List[Tuple[str, Callable[[bytes], Optional[bytes]]]]
         first_pass: TechniqueList = [
-            ('MSOffice Embedded script', self.msoffice_embedded_script_string),
-            ('CHR and CHRB decode', self.chr_decode),
-            ('String replace', self.string_replace),
-            ('Powershell carets', self.powershell_carets),
-            ('Array of strings', self.array_of_strings),
-            ('Fake array vars', self.vars_of_fake_arrays),
-            ('Reverse strings', self.str_reverse),
-            ('B64 Decode', self.b64decode_str),
-            ('Simple XOR function', self.simple_xor_function),
+            ("MSOffice Embedded script", self.msoffice_embedded_script_string),
+            ("CHR and CHRB decode", self.chr_decode),
+            ("String replace", self.string_replace),
+            ("Powershell carets", self.powershell_carets),
+            ("Array of strings", self.array_of_strings),
+            ("Fake array vars", self.vars_of_fake_arrays),
+            ("Reverse strings", self.str_reverse),
+            ("B64 Decode", self.b64decode_str),
+            ("Simple XOR function", self.simple_xor_function),
         ]
         second_pass: TechniqueList = [
-            ('Concat strings', self.concat_strings),
-            ('MSWord macro vars', self.mswordmacro_vars),
-            ('Powershell vars', self.powershell_vars),
-            ('Hex Charcodes', self.charcode_hex),
+            ("Concat strings", self.concat_strings),
+            ("MSWord macro vars", self.mswordmacro_vars),
+            ("Powershell vars", self.powershell_vars),
+            ("Hex Charcodes", self.charcode_hex),
             # ('Octal Charcodes', self.charcode_oct),
-            ('Unicode Charcodes', self.charcode_unicode),
-            ('XML Charcodes', self.charcode_xml),
-            ('Hex Int Constants', self.hex_constant),
+            ("Unicode Charcodes", self.charcode_unicode),
+            ("XML Charcodes", self.charcode_xml),
+            ("Hex Int Constants", self.hex_constant),
         ]
         second_pass.extend(first_pass)
         final_pass: TechniqueList = []
 
-        code_extracts = [
-            ('.*html.*', "HTML scripts extraction", self.extract_htmlscript)
-        ]
+        code_extracts = [(".*html.*", "HTML scripts extraction", self.extract_htmlscript)]
 
         layers_list: list[str] = []
         layer = request.file_contents
 
         # --- Stage 1: Script Extraction --------------------------------------------------------------------------
-        if request.file_type == 'code/ps1':
+        if request.file_type == "code/ps1":
             sig = regex.search(
-                rb'# SIG # Begin signature block\r\n(?:# [A-Za-z0-9+/=]+\r\n)+# SIG # End signature block',
-                request.file_contents)
+                rb"# SIG # Begin signature block\r\n(?:# [A-Za-z0-9+/=]+\r\n)+# SIG # End signature block",
+                request.file_contents,
+            )
             if sig:
-                layer = layer[:sig.start()] + layer[sig.end():]
-                lines = sig.group().split(b'\r\n# ')
-                base64 = b''.join(line.strip() for line in lines[1:-1])
+                layer = layer[: sig.start()] + layer[sig.end() :]
+                lines = sig.group().split(b"\r\n# ")
+                base64 = b"".join(line.strip() for line in lines[1:-1])
                 try:
                     # Extract signature
                     signature = binascii.a2b_base64(base64)
-                    sig_filename = 'powershell_signature'
+                    sig_filename = "powershell_signature"
                     sig_path = os.path.join(self.working_directory, sig_filename)
-                    with open(sig_path, 'wb+') as f:
+                    with open(sig_path, "wb+") as f:
                         f.write(signature)
                     request.add_extracted(sig_path, sig_filename, "Powershell Signature")
                 except binascii.Error:
@@ -574,10 +601,13 @@ def execute(self, request: ServiceRequest) -> None:
                 layer = res
 
         # --- Compiling results -----------------------------------------------------------------------------------
-        if request.get_param('extract_original_iocs'):
+        if request.get_param("extract_original_iocs"):
             pat_values = patterns.ioc_match(before_deobfuscation, bogon_ip=True, just_network=False)
-            ioc_res = ResultSection("The following IOCs were found in the original file", parent=request.result,
-                                    body_format=BODY_FORMAT.MEMORY_DUMP)
+            ioc_res = ResultSection(
+                "The following IOCs were found in the original file",
+                parent=request.result,
+                body_format=BODY_FORMAT.MEMORY_DUMP,
+            )
             for k, val in pat_values.items():
                 for v in val:
                     if ioc_res:
@@ -593,9 +623,9 @@ def execute(self, request: ServiceRequest) -> None:
 
         # Display obfuscation steps
         heuristic = Heuristic(1)
-        mres = ResultSection("De-obfuscation steps taken by DeobsfuScripter",
-                             parent=request.result,
-                             heuristic=heuristic)
+        mres = ResultSection(
+            "De-obfuscation steps taken by DeobsfuScripter", parent=request.result, heuristic=heuristic
+        )
 
         tech_count = Counter(layers_list)
         for tech, count in tech_count.items():
@@ -607,8 +637,8 @@ def execute(self, request: ServiceRequest) -> None:
         diff_tags: Dict[str, List[bytes]] = {}
         for ioc_type, iocs in pat_values.items():
             for ioc in iocs:
-                if ioc_type == 'network.static.uri':
-                    if b'/'.join(ioc.split(b'/', 3)[:3]) not in before_deobfuscation:
+                if ioc_type == "network.static.uri":
+                    if b"/".join(ioc.split(b"/", 3)[:3]) not in before_deobfuscation:
                         diff_tags.setdefault(ioc_type, [])
                         diff_tags[ioc_type].append(ioc)
                 elif ioc not in before_deobfuscation:
@@ -621,8 +651,8 @@ def execute(self, request: ServiceRequest) -> None:
         reversed_file = before_deobfuscation[::-1]
         for ioc_type, iocs in rev_values.items():
             for ioc in iocs:
-                if ioc_type == 'network.static.uri':
-                    if b'/'.join(ioc.split(b'/', 3)[:3]) not in reversed_file:
+                if ioc_type == "network.static.uri":
+                    if b"/".join(ioc.split(b"/", 3)[:3]) not in reversed_file:
                         rev_tags.setdefault(ioc_type, [])
                         rev_tags[ioc_type].append(ioc)
                 elif ioc not in reversed_file and ioc[::-1] not in diff_tags.get(ioc_type, []):
@@ -638,21 +668,26 @@ def execute(self, request: ServiceRequest) -> None:
             file_path = os.path.join(self.working_directory, file_name)
             # Ensure directory exists before write
             os.makedirs(os.path.dirname(file_path), exist_ok=True)
-            with open(file_path, 'wb+') as f:
+            with open(file_path, "wb+") as f:
                 f.write(clean)
                 self.log.debug(f"Submitted dropped file for analysis: {file_path}")
             request.add_supplementary(file_path, file_name, "Final deobfuscated layer")
 
-        ResultSection(f"First {byte_count} bytes of the final layer:", body=safe_str(clean[:byte_count]),
-                      body_format=BODY_FORMAT.MEMORY_DUMP, parent=request.result)
+        ResultSection(
+            f"First {byte_count} bytes of the final layer:",
+            body=safe_str(clean[:byte_count]),
+            body_format=BODY_FORMAT.MEMORY_DUMP,
+            parent=request.result,
+        )
 
         # Display new IOCs from final layer
         if diff_tags or rev_tags:
-            ioc_new = ResultSection("New IOCs found after de-obfustcation", parent=request.result,
-                                    body_format=BODY_FORMAT.MEMORY_DUMP)
+            ioc_new = ResultSection(
+                "New IOCs found after de-obfustcation", parent=request.result, body_format=BODY_FORMAT.MEMORY_DUMP
+            )
             has_network_heur = False
             for ty, val in chain(diff_tags.items(), rev_tags.items()):
-                if "network" in ty and ty != 'network.static.domain':
+                if "network" in ty and ty != "network.static.domain":
                     has_network_heur = True
                 for v in val:
                     ioc_new.add_line(f"Found {ty.upper().replace('.', ' ')}: {safe_str(v)}")
@@ -664,14 +699,21 @@ def execute(self, request: ServiceRequest) -> None:
                 ioc_new.set_heuristic(6)
 
         if len(self.files_extracted) > 0:
-            ext_file_res = ResultSection("The following files were extracted during the deobfuscation",
-                                         heuristic=Heuristic(8), parent=request.result)
+            ext_file_res = ResultSection(
+                "The following files were extracted during the deobfuscation",
+                heuristic=Heuristic(8),
+                parent=request.result,
+            )
             for extracted in self.files_extracted:
                 file_name = os.path.basename(extracted)
                 try:
-                    if request.add_extracted(extracted, file_name, "File of interest deobfuscated from sample",
-                                             safelist_interface=self.api_interface):
+                    if request.add_extracted(
+                        extracted,
+                        file_name,
+                        "File of interest deobfuscated from sample",
+                        safelist_interface=self.api_interface,
+                    ):
                         ext_file_res.add_line(file_name)
                 except MaxExtractedExceeded:
-                    self.log.warning('Extraction limit exceeded while adding files of interest.')
+                    self.log.warning("Extraction limit exceeded while adding files of interest.")
                     break

From dae7cf9af0925f737e161ab45f77b8e58a1940e4 Mon Sep 17 00:00:00 2001
From: cccs-kevin <kevin.hardy-cooper@cyber.gc.ca>
Date: Fri, 3 Nov 2023 15:35:18 +0000
Subject: [PATCH 2/2] Adding git blame ignore revs

---
 .git-blame-ignore-revs | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 .git-blame-ignore-revs

diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
new file mode 100644
index 0000000..e1e8b15
--- /dev/null
+++ b/.git-blame-ignore-revs
@@ -0,0 +1,2 @@
+# Migrate code style to Black
+a9f58142052ae5258bc9ae859a7567187699469d