From c5062db0fd88fcfb073c5938feeabeaca73510cb Mon Sep 17 00:00:00 2001
From: cccs-jh <63320703+cccs-jh@users.noreply.github.com>
Date: Wed, 22 Mar 2023 16:12:00 -0400
Subject: [PATCH 1/4] Reducing indentation in base64

---
 deobs.py | 52 ++++++++++++++++++++++++++--------------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/deobs.py b/deobs.py
index 78c2e0b..1c39984 100644
--- a/deobs.py
+++ b/deobs.py
@@ -147,36 +147,36 @@ def b64decode_str(self, text: bytes) -> Optional[bytes]:
             s = bmatch.replace(b'\n',
                                b'').replace(b'\r', b'').replace(b' ', b'').replace(b'&#xA;', b'').replace(b'&#10;', b'')
             uniq_char = set(s)
-            if len(uniq_char) > 6:
-                if len(s) >= 16 and len(s) % 4 == 0:
-                    try:
-                        d = binascii.a2b_base64(s)
-                    except binascii.Error:
-                        continue
+            if len(uniq_char) <= 6 or len(s) < 16 or len(s) % 4:
+                continue
+            try:
+                d = binascii.a2b_base64(s)
+            except binascii.Error:
+                continue
+            sha256hash = hashlib.sha256(d).hexdigest()
+            if sha256hash not in self.hashes:
+                if len(d) > 500:
                     m = magic.Magic(mime=True)
                     mag = magic.Magic()
                     ftype = m.from_buffer(d)
                     mag_ftype = mag.from_buffer(d)
-                    sha256hash = hashlib.sha256(d).hexdigest()
-                    if sha256hash not in self.hashes:
-                        if len(d) > 500:
-                            for file_type in self.FILETYPES:
-                                if (file_type in ftype and 'octet-stream' not in ftype) or file_type in mag_ftype:
-                                    b64_file_name = f"{sha256hash[0:10]}_b64_decoded"
-                                    b64_file_path = os.path.join(self.working_directory, b64_file_name)
-                                    with open(b64_file_path, 'wb') as b64_file:
-                                        b64_file.write(d)
-                                    self.files_extracted.add(b64_file_path)
-                                    self.hashes.add(sha256hash)
-                                    break
-
-                        if len(set(d)) > 6 and all(8 < c < 127 for c in d) and len(regex.sub(rb"\s", b"", d)) > 14:
-                            output = output.replace(bmatch, d)
-                        else:
-                            # Test for ASCII seperated by \x00
-                            p = d.replace(b'\x00', b'')
-                            if len(set(p)) > 6 and all(8 < c < 127 for c in p) and len(regex.sub(rb"\s", b"", p)) > 14:
-                                output = output.replace(bmatch, p)
+                    for file_type in self.FILETYPES:
+                        if (file_type in ftype and 'octet-stream' not in ftype) or file_type in mag_ftype:
+                            b64_file_name = f"{sha256hash[0:10]}_b64_decoded"
+                            b64_file_path = os.path.join(self.working_directory, b64_file_name)
+                            with open(b64_file_path, 'wb') as b64_file:
+                                b64_file.write(d)
+                            self.files_extracted.add(b64_file_path)
+                            self.hashes.add(sha256hash)
+                            break
+
+                if len(set(d)) > 6 and all(8 < c < 127 for c in d) and len(regex.sub(rb"\s", b"", d)) > 14:
+                    output = output.replace(bmatch, d)
+                else:
+                    # Test for ASCII seperated by \x00
+                    p = d.replace(b'\x00', b'')
+                    if len(set(p)) > 6 and all(8 < c < 127 for c in p) and len(regex.sub(rb"\s", b"", p)) > 14:
+                        output = output.replace(bmatch, p)
 
         if output == text:
             return None

From 4ba7ac8da84f16c37eebecd39982954cc638cf12 Mon Sep 17 00:00:00 2001
From: cccs-jh <63320703+cccs-jh@users.noreply.github.com>
Date: Wed, 22 Mar 2023 16:39:19 -0400
Subject: [PATCH 2/4] adding typehint and better style

---
 deobs.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/deobs.py b/deobs.py
index 1c39984..e0cf46a 100644
--- a/deobs.py
+++ b/deobs.py
@@ -141,11 +141,14 @@ def string_replace(text: bytes) -> Optional[bytes]:
 
     def b64decode_str(self, text: bytes) -> Optional[bytes]:
         """ Decode base64 """
-        b64str = regex.findall(b'((?:[A-Za-z0-9+/]{3,}={0,2}(?:&#[x1][A0];)?[\r]?[\n]?){6,})', text)
+        b64str: list[bytes] = regex.findall(b'((?:[A-Za-z0-9+/]{3,}={0,2}(?:&#[x1][A0];)?[\r]?[\n]?){6,})', text)
         output = text
         for bmatch in b64str:
-            s = bmatch.replace(b'\n',
-                               b'').replace(b'\r', b'').replace(b' ', b'').replace(b'&#xA;', b'').replace(b'&#10;', b'')
+            s = (bmatch.replace(b'\n', b'')
+                       .replace(b'\r', b'')
+                       .replace(b' ', b'')
+                       .replace(b'&#xA;', b'')
+                       .replace(b'&#10;', b''))
             uniq_char = set(s)
             if len(uniq_char) <= 6 or len(s) < 16 or len(s) % 4:
                 continue

From 52ded58e6eedaaf9d9062da5d2b0314483b58bb3 Mon Sep 17 00:00:00 2001
From: cccs-jh <63320703+cccs-jh@users.noreply.github.com>
Date: Wed, 22 Mar 2023 19:11:37 -0400
Subject: [PATCH 3/4] Adding handling for javascript atob

---
 deobs.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/deobs.py b/deobs.py
index e0cf46a..637e2d7 100644
--- a/deobs.py
+++ b/deobs.py
@@ -141,8 +141,21 @@ def string_replace(text: bytes) -> Optional[bytes]:
 
     def b64decode_str(self, text: bytes) -> Optional[bytes]:
         """ Decode base64 """
-        b64str: list[bytes] = regex.findall(b'((?:[A-Za-z0-9+/]{3,}={0,2}(?:&#[x1][A0];)?[\r]?[\n]?){6,})', text)
         output = text
+
+        head: bytes
+        bmatch: bytes
+        tail: bytes
+        for head, bmatch, tail in regex.findall(rb'((?:atob\()+)\'([A-Za-z0-9+/]={0,2})\'(\)+)', text):
+            iters = min(len(head)//5, len(tail))
+            for _ in range(iters):
+                try:
+                    d = binascii.a2b_base64(bmatch)
+                except binascii.Error:
+                    break
+            output.replace(b'atob('*iters + b"'" + bmatch + b"'" + b')'*iters, b"'" + d + b"'")
+
+        b64str: list[bytes] = regex.findall(b'((?:[A-Za-z0-9+/]{3,}={0,2}(?:&#[x1][A0];)?[\r]?[\n]?){6,})', text)
         for bmatch in b64str:
             s = (bmatch.replace(b'\n', b'')
                        .replace(b'\r', b'')

From 8b1f343b9eaa812c6efbc42ddf27462b1fe8dc3d Mon Sep 17 00:00:00 2001
From: cccs-jh <63320703+cccs-jh@users.noreply.github.com>
Date: Wed, 22 Mar 2023 20:09:04 -0400
Subject: [PATCH 4/4] bugfixes

---
 deobs.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/deobs.py b/deobs.py
index 637e2d7..de1742f 100644
--- a/deobs.py
+++ b/deobs.py
@@ -146,17 +146,20 @@ def b64decode_str(self, text: bytes) -> Optional[bytes]:
         head: bytes
         bmatch: bytes
         tail: bytes
-        for head, bmatch, tail in regex.findall(rb'((?:atob\()+)\'([A-Za-z0-9+/]={0,2})\'(\)+)', text):
+        for head, bmatch, tail in regex.findall(rb'((?:atob\()+)\'([A-Za-z0-9+/]+={0,2})\'(\)+)', text):
             iters = min(len(head)//5, len(tail))
+            d = bmatch
             for _ in range(iters):
                 try:
-                    d = binascii.a2b_base64(bmatch)
+                    d = binascii.a2b_base64(d)
                 except binascii.Error:
                     break
-            output.replace(b'atob('*iters + b"'" + bmatch + b"'" + b')'*iters, b"'" + d + b"'")
+            output = output.replace(b'atob('*iters + b"'" + bmatch + b"'" + b')'*iters, b"'" + d + b"'")
 
         b64str: list[bytes] = regex.findall(b'((?:[A-Za-z0-9+/]{3,}={0,2}(?:&#[x1][A0];)?[\r]?[\n]?){6,})', text)
         for bmatch in b64str:
+            if bmatch not in output:
+                continue  # was already processed by atob
             s = (bmatch.replace(b'\n', b'')
                        .replace(b'\r', b'')
                        .replace(b' ', b'')