From 32bce0744664a2bbea77dd79bb786a876530dbf7 Mon Sep 17 00:00:00 2001 From: rtrimana Date: Mon, 11 Oct 2021 21:49:36 -0700 Subject: [PATCH] Bug fix for decoding error: most likely because of non UTF-8 characters. --- network_traffic/post-processing/extract_from_tshark.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/network_traffic/post-processing/extract_from_tshark.py b/network_traffic/post-processing/extract_from_tshark.py index 51ae6b5..a60df00 100644 --- a/network_traffic/post-processing/extract_from_tshark.py +++ b/network_traffic/post-processing/extract_from_tshark.py @@ -218,12 +218,12 @@ def extract_other_pkt(layers, frame_num, include_http_body=False): def extract_from_tshark(full_path, data, is_decrypted, include_http_body=False): - with open(full_path, "r") as jf: + with open(full_path, "rb") as jf: # Since certain json 'keys' appear multiple times in our data, we have to make them # unique first (we can't use regular json.load() or we lose some data points). From: # https://stackoverflow.com/questions/29321677/python-json-parser-allow-duplicate-keys decoder = json.JSONDecoder(object_pairs_hook=parse_object_pairs) - pcap_data = decoder.decode(jf.read()) + pcap_data = decoder.decode(jf.read().decode(errors='ignore')) for packet in pcap_data: layers = packet[json_keys.source][json_keys.layers]