From 0fbabea67c885588c356170a83a5009d564a63e7 Mon Sep 17 00:00:00 2001
From: cccs-jh <63320703+cccs-jh@users.noreply.github.com>
Date: Wed, 22 Mar 2023 21:09:07 -0400
Subject: [PATCH 1/2] Improving Charcodes

- added decimal \nnn escape sequences to charcode
- removed 0xhh sequences from charcode_hex
  to stop it mangling integer constants
- created hex_constant to handle 0xhh.. integer constants
---
 deobs.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/deobs.py b/deobs.py
index 78c2e0b..4fde1c0 100644
--- a/deobs.py
+++ b/deobs.py
@@ -69,12 +69,14 @@ def add1b(s: bytes, k: int) -> bytes:
     @staticmethod
     def charcode(text: bytes) -> Optional[bytes]:
         """ Replace character codes with the corresponding characters """
-        # To do: what decimal encodings exist in scripting languages and how to decode them?
+        # To do: something to handle powershell bytes syntax
+        output = regex.sub(rb'\\(\d{1,3})', lambda m: bytes((int(m.group(1)),)), text)
+        return output if output != text else None
 
     @staticmethod
     def charcode_hex(text: bytes) -> Optional[bytes]:
         """ Replace hex character codes with the corresponding characters """
-        output = regex.sub(rb'(?i)(?:\\x|0x|%)([a-f0-9]{2})', lambda m: binascii.unhexlify(m.group(1)), text)
+        output = regex.sub(rb'(?i)(?:\\x|%)([a-f0-9]{2})', lambda m: binascii.unhexlify(m.group(1)), text)
         return output if output != text else None
 
     @staticmethod
@@ -90,6 +92,12 @@ def charcode_xml(text: bytes) -> Optional[bytes]:
         output = regex.sub(rb'&#([0-9]{1,7});', partial(DeobfuScripter.codepoint_sub, base=10), output)
         return output if output != text else None
 
+    @staticmethod
+    def hex_constant(text: bytes) -> Optional[bytes]:
+        """ Replace hexadecimal integer constants with decimal ones"""
+        output = regex.sub(rb'(?i)\b0x([a-f0-9]{0,16})\b', lambda m: str(int(m.group(1), 16)).encode(), text)
+        return output if output != text else None
+
     @staticmethod
     def chr_decode(text: bytes) -> Optional[bytes]:
         """ Replace calls to chr with the corresponding character """
@@ -471,9 +479,11 @@ def execute(self, request: ServiceRequest) -> None:
             ('Concat strings', self.concat_strings),
             ('MSWord macro vars', self.mswordmacro_vars),
             ('Powershell vars', self.powershell_vars),
+            ('Charcodes', self.charcode),
             ('Hex Charcodes', self.charcode_hex),
             ('Unicode Charcodes', self.charcode_unicode),
-            ('XML Charcodes', self.charcode_xml)
+            ('XML Charcodes', self.charcode_xml),
+            ('Hex Int Constants', self.hex_constant),
         ]
         second_pass.extend(first_pass)
         final_pass: TechniqueList = []

From 25b51e13e9353da405322c795dc2d7803e06f214 Mon Sep 17 00:00:00 2001
From: cccs-jh <63320703+cccs-jh@users.noreply.github.com>
Date: Fri, 24 Mar 2023 11:26:54 -0400
Subject: [PATCH 2/2] Fix charcodes errors

- \nnn escapes are octal and not decimal, and potentially causes problems for windows filepaths,
  disable for now
- hex_constants needs at least one hex character to decode.
---
 deobs.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/deobs.py b/deobs.py
index 213a5cf..0352def 100644
--- a/deobs.py
+++ b/deobs.py
@@ -69,9 +69,7 @@ def add1b(s: bytes, k: int) -> bytes:
     @staticmethod
     def charcode(text: bytes) -> Optional[bytes]:
         """ Replace character codes with the corresponding characters """
-        # To do: something to handle powershell bytes syntax
-        output = regex.sub(rb'\\(\d{1,3})', lambda m: bytes((int(m.group(1)),)), text)
-        return output if output != text else None
+        # Todo: something to handle powershell bytes syntax
 
     @staticmethod
     def charcode_hex(text: bytes) -> Optional[bytes]:
@@ -79,6 +77,13 @@ def charcode_hex(text: bytes) -> Optional[bytes]:
         output = regex.sub(rb'(?i)(?:\\x|%)([a-f0-9]{2})', lambda m: binascii.unhexlify(m.group(1)), text)
         return output if output != text else None
 
+    # Todo: find a way to prevent charcode_oct from mangling windows filepaths with sections that start with 0-7
+    @staticmethod
+    def charcode_oct(text: bytes) -> Optional[bytes]:
+        """ Replace octal character codes with the corresponding characters """
+        output = regex.sub(rb'\\([0-7]{1,3})', partial(DeobfuScripter.codepoint_sub, base=8), text)
+        return output if output != text else None
+
     @staticmethod
     def charcode_unicode(text: bytes) -> Optional[bytes]:
         """ Replace unicode character codes with the corresponding utf-8 byte sequence"""
@@ -95,7 +100,7 @@ def charcode_xml(text: bytes) -> Optional[bytes]:
     @staticmethod
     def hex_constant(text: bytes) -> Optional[bytes]:
         """ Replace hexadecimal integer constants with decimal ones"""
-        output = regex.sub(rb'(?i)\b0x([a-f0-9]{0,16})\b', lambda m: str(int(m.group(1), 16)).encode(), text)
+        output = regex.sub(rb'(?i)\b0x([a-f0-9]{1,16})\b', lambda m: str(int(m.group(1), 16)).encode('utf-8'), text)
         return output if output != text else None
 
     @staticmethod
@@ -498,8 +503,8 @@ def execute(self, request: ServiceRequest) -> None:
             ('Concat strings', self.concat_strings),
             ('MSWord macro vars', self.mswordmacro_vars),
             ('Powershell vars', self.powershell_vars),
-            ('Charcodes', self.charcode),
             ('Hex Charcodes', self.charcode_hex),
+            # ('Octal Charcodes', self.charcode_oct),
             ('Unicode Charcodes', self.charcode_unicode),
             ('XML Charcodes', self.charcode_xml),
             ('Hex Int Constants', self.hex_constant),