From c5062db0fd88fcfb073c5938feeabeaca73510cb Mon Sep 17 00:00:00 2001 From: cccs-jh <63320703+cccs-jh@users.noreply.github.com> Date: Wed, 22 Mar 2023 16:12:00 -0400 Subject: [PATCH 1/4] Reducing indentation in base64 --- deobs.py | 52 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/deobs.py b/deobs.py index 78c2e0b..1c39984 100644 --- a/deobs.py +++ b/deobs.py @@ -147,36 +147,36 @@ def b64decode_str(self, text: bytes) -> Optional[bytes]: s = bmatch.replace(b'\n', b'').replace(b'\r', b'').replace(b' ', b'').replace(b' ', b'').replace(b' ', b'') uniq_char = set(s) - if len(uniq_char) > 6: - if len(s) >= 16 and len(s) % 4 == 0: - try: - d = binascii.a2b_base64(s) - except binascii.Error: - continue + if len(uniq_char) <= 6 or len(s) < 16 or len(s) % 4: + continue + try: + d = binascii.a2b_base64(s) + except binascii.Error: + continue + sha256hash = hashlib.sha256(d).hexdigest() + if sha256hash not in self.hashes: + if len(d) > 500: m = magic.Magic(mime=True) mag = magic.Magic() ftype = m.from_buffer(d) mag_ftype = mag.from_buffer(d) - sha256hash = hashlib.sha256(d).hexdigest() - if sha256hash not in self.hashes: - if len(d) > 500: - for file_type in self.FILETYPES: - if (file_type in ftype and 'octet-stream' not in ftype) or file_type in mag_ftype: - b64_file_name = f"{sha256hash[0:10]}_b64_decoded" - b64_file_path = os.path.join(self.working_directory, b64_file_name) - with open(b64_file_path, 'wb') as b64_file: - b64_file.write(d) - self.files_extracted.add(b64_file_path) - self.hashes.add(sha256hash) - break - - if len(set(d)) > 6 and all(8 < c < 127 for c in d) and len(regex.sub(rb"\s", b"", d)) > 14: - output = output.replace(bmatch, d) - else: - # Test for ASCII seperated by \x00 - p = d.replace(b'\x00', b'') - if len(set(p)) > 6 and all(8 < c < 127 for c in p) and len(regex.sub(rb"\s", b"", p)) > 14: - output = output.replace(bmatch, p) + for file_type in self.FILETYPES: + if (file_type in ftype and 'octet-stream' not in ftype) or file_type in mag_ftype: + b64_file_name = f"{sha256hash[0:10]}_b64_decoded" + b64_file_path = os.path.join(self.working_directory, b64_file_name) + with open(b64_file_path, 'wb') as b64_file: + b64_file.write(d) + self.files_extracted.add(b64_file_path) + self.hashes.add(sha256hash) + break + + if len(set(d)) > 6 and all(8 < c < 127 for c in d) and len(regex.sub(rb"\s", b"", d)) > 14: + output = output.replace(bmatch, d) + else: + # Test for ASCII seperated by \x00 + p = d.replace(b'\x00', b'') + if len(set(p)) > 6 and all(8 < c < 127 for c in p) and len(regex.sub(rb"\s", b"", p)) > 14: + output = output.replace(bmatch, p) if output == text: return None From 4ba7ac8da84f16c37eebecd39982954cc638cf12 Mon Sep 17 00:00:00 2001 From: cccs-jh <63320703+cccs-jh@users.noreply.github.com> Date: Wed, 22 Mar 2023 16:39:19 -0400 Subject: [PATCH 2/4] adding typehint and better style --- deobs.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/deobs.py b/deobs.py index 1c39984..e0cf46a 100644 --- a/deobs.py +++ b/deobs.py @@ -141,11 +141,14 @@ def string_replace(text: bytes) -> Optional[bytes]: def b64decode_str(self, text: bytes) -> Optional[bytes]: """ Decode base64 """ - b64str = regex.findall(b'((?:[A-Za-z0-9+/]{3,}={0,2}(?:&#[x1][A0];)?[\r]?[\n]?){6,})', text) + b64str: list[bytes] = regex.findall(b'((?:[A-Za-z0-9+/]{3,}={0,2}(?:&#[x1][A0];)?[\r]?[\n]?){6,})', text) output = text for bmatch in b64str: - s = bmatch.replace(b'\n', - b'').replace(b'\r', b'').replace(b' ', b'').replace(b' ', b'').replace(b' ', b'') + s = (bmatch.replace(b'\n', b'') + .replace(b'\r', b'') + .replace(b' ', b'') + .replace(b' ', b'') + .replace(b' ', b'')) uniq_char = set(s) if len(uniq_char) <= 6 or len(s) < 16 or len(s) % 4: continue From 52ded58e6eedaaf9d9062da5d2b0314483b58bb3 Mon Sep 17 00:00:00 2001 From: cccs-jh <63320703+cccs-jh@users.noreply.github.com> Date: Wed, 22 Mar 2023 19:11:37 -0400 Subject: [PATCH 3/4] Adding handling for javascript atob --- deobs.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/deobs.py b/deobs.py index e0cf46a..637e2d7 100644 --- a/deobs.py +++ b/deobs.py @@ -141,8 +141,21 @@ def string_replace(text: bytes) -> Optional[bytes]: def b64decode_str(self, text: bytes) -> Optional[bytes]: """ Decode base64 """ - b64str: list[bytes] = regex.findall(b'((?:[A-Za-z0-9+/]{3,}={0,2}(?:&#[x1][A0];)?[\r]?[\n]?){6,})', text) output = text + + head: bytes + bmatch: bytes + tail: bytes + for head, bmatch, tail in regex.findall(rb'((?:atob\()+)\'([A-Za-z0-9+/]={0,2})\'(\)+)', text): + iters = min(len(head)//5, len(tail)) + for _ in range(iters): + try: + d = binascii.a2b_base64(bmatch) + except binascii.Error: + break + output.replace(b'atob('*iters + b"'" + bmatch + b"'" + b')'*iters, b"'" + d + b"'") + + b64str: list[bytes] = regex.findall(b'((?:[A-Za-z0-9+/]{3,}={0,2}(?:&#[x1][A0];)?[\r]?[\n]?){6,})', text) for bmatch in b64str: s = (bmatch.replace(b'\n', b'') .replace(b'\r', b'') From 8b1f343b9eaa812c6efbc42ddf27462b1fe8dc3d Mon Sep 17 00:00:00 2001 From: cccs-jh <63320703+cccs-jh@users.noreply.github.com> Date: Wed, 22 Mar 2023 20:09:04 -0400 Subject: [PATCH 4/4] bugfixes --- deobs.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/deobs.py b/deobs.py index 637e2d7..de1742f 100644 --- a/deobs.py +++ b/deobs.py @@ -146,17 +146,20 @@ def b64decode_str(self, text: bytes) -> Optional[bytes]: head: bytes bmatch: bytes tail: bytes - for head, bmatch, tail in regex.findall(rb'((?:atob\()+)\'([A-Za-z0-9+/]={0,2})\'(\)+)', text): + for head, bmatch, tail in regex.findall(rb'((?:atob\()+)\'([A-Za-z0-9+/]+={0,2})\'(\)+)', text): iters = min(len(head)//5, len(tail)) + d = bmatch for _ in range(iters): try: - d = binascii.a2b_base64(bmatch) + d = binascii.a2b_base64(d) except binascii.Error: break - output.replace(b'atob('*iters + b"'" + bmatch + b"'" + b')'*iters, b"'" + d + b"'") + output = output.replace(b'atob('*iters + b"'" + bmatch + b"'" + b')'*iters, b"'" + d + b"'") b64str: list[bytes] = regex.findall(b'((?:[A-Za-z0-9+/]{3,}={0,2}(?:&#[x1][A0];)?[\r]?[\n]?){6,})', text) for bmatch in b64str: + if bmatch not in output: + continue # was already processed by atob s = (bmatch.replace(b'\n', b'') .replace(b'\r', b'') .replace(b' ', b'')