-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
b5e7a87
commit d811818
Showing
3 changed files
with
243 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
import pyshark | ||
import json | ||
import binascii | ||
from pathlib import Path | ||
import time | ||
|
||
def packet_to_dict(packet): | ||
packet_dict = {} | ||
for layer in packet.layers: | ||
if layer.layer_name == 'http': | ||
# 仅处理HTTP请求 | ||
if not hasattr(layer, 'request_method'): | ||
continue | ||
|
||
# 提取请求行 | ||
if hasattr(layer, 'request_method'): | ||
packet_dict['method'] = layer.request_method | ||
if hasattr(layer, 'request_uri'): | ||
packet_dict['uri'] = layer.request_uri | ||
if hasattr(layer, 'request_version'): | ||
packet_dict['version'] = layer.request_version | ||
|
||
# 提取头部字段 | ||
headers = {} | ||
body = None | ||
excluded_fields = ['', "_ws_expert", "chat", "_ws_expert_message", "_ws_expert_severity", "_ws_expert_group", "request_line", "request_full_uri", "request", "request_number", "file_data", "data_data", "content_length_header"] | ||
for field_name in layer.field_names: | ||
if field_name not in excluded_fields: | ||
header_name = field_name.replace('_', '-') | ||
headers[header_name] = getattr(layer, field_name, None) | ||
if field_name == "file_data": | ||
hex_body = layer.file_data.replace(':', '') | ||
body = binascii.unhexlify(hex_body).decode('utf-8', errors='replace') | ||
packet_dict['headers'] = headers | ||
if body: | ||
packet_dict['body'] = body | ||
return packet_dict | ||
|
||
def process_pcap_file(pcap_file): | ||
packets = [] | ||
try: | ||
cap = pyshark.FileCapture(str(pcap_file), display_filter='http') | ||
for packet in cap: | ||
packet_dict = packet_to_dict(packet) | ||
if packet_dict: # 仅添加包含HTTP请求的数据包 | ||
packets.append(packet_dict) | ||
cap.close() | ||
except Exception as e: | ||
print(f"Error processing {pcap_file}: {e}") | ||
|
||
return packets | ||
|
||
def pcap_to_json(input_dir): | ||
# 获取目录下的所有PCAP文件 | ||
pcap_files = Path(input_dir).glob('*.pcap') | ||
# 将 pcap_files 转换成 list | ||
pcap_files = list(pcap_files) | ||
|
||
pcap_num = len(pcap_files) | ||
i = 0 | ||
for pcap_file in pcap_files: | ||
packets = process_pcap_file(pcap_file) | ||
output_file = pcap_file.with_suffix('.json') | ||
|
||
# 写入到输出文件 | ||
with open(output_file, 'w') as f: | ||
json.dump(packets, f, indent=4) | ||
|
||
time.sleep(2) # 等待2秒以避免冲突 | ||
# 进度条 | ||
i += 1 | ||
print(f"\rProgress: {i}/{pcap_num}", end="") | ||
|
||
def main(): | ||
CURRENT_PATH = Path(__file__).parent | ||
input_dir = CURRENT_PATH / 'pcaps' # 替换为你的PCAP文件目录 | ||
|
||
# 运行任务 | ||
pcap_to_json(input_dir) | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
# 将单个PCAP文件转换为JSON文件 | ||
import pyshark | ||
import json | ||
import binascii | ||
from pathlib import Path | ||
import hashlib | ||
|
||
def packet_to_dict(packet): | ||
packet_dict = {} | ||
for layer in packet.layers: | ||
if layer.layer_name == 'http': | ||
# 仅处理HTTP请求 | ||
if not hasattr(layer, 'request_method'): | ||
continue | ||
|
||
# 提取请求行 | ||
if hasattr(layer, 'request_method'): | ||
packet_dict['method'] = layer.request_method | ||
if hasattr(layer, 'request_uri'): | ||
packet_dict['uri'] = layer.request_uri | ||
if hasattr(layer, 'request_version'): | ||
packet_dict['version'] = layer.request_version | ||
|
||
# 提取头部字段 | ||
headers = {} | ||
body = None | ||
for field_name in layer.field_names: | ||
header_name = field_name.replace('_', '-') | ||
headers[header_name] = getattr(layer, field_name, None) | ||
if field_name == "file_data": | ||
hex_body = layer.file_data.replace(':', '') | ||
body = binascii.unhexlify(hex_body).decode('utf-8', errors='replace') | ||
|
||
# 删除不需要的字段 | ||
excluded_fields = ['', "_ws_expert", "chat", "_ws_expert_message", "_ws_expert_severity", "_ws_expert_group", "request_line", "request_full_uri", "request", "request_number", "file_data", "data_data", "content_length_header","request-method","request-uri","request-version","cookie-pair"] | ||
for field in excluded_fields: | ||
headers.pop(field.replace('_', '-'), None) | ||
|
||
packet_dict['headers'] = headers | ||
if body: | ||
packet_dict['body'] = body | ||
# 计算 packet_dict 的 md5 值 | ||
packet_dict['md5'] = hashlib.md5(json.dumps(packet_dict, sort_keys=True).encode()).hexdigest() | ||
return packet_dict | ||
|
||
def pcap_to_json(input_file, output_file): | ||
# 读取PCAP文件 | ||
cap = pyshark.FileCapture(input_file, display_filter='http') | ||
|
||
# 解析为JSON格式 | ||
packets = [] | ||
for packet in cap: | ||
packet_dict = packet_to_dict(packet) | ||
if packet_dict: # 仅添加包含HTTP请求的数据包 | ||
packets.append(packet_dict) | ||
|
||
# 写入到输出文件 | ||
with open(output_file, 'w') as f: | ||
json.dump(packets, f, indent=4) | ||
|
||
# 使用方法 | ||
CURRENT_PATH = Path(__file__).parent | ||
input_pcap = CURRENT_PATH / 'index.pcap' | ||
output_pcap = CURRENT_PATH / 'index.json' | ||
pcap_to_json(input_pcap, output_pcap) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
# 将所有PCAP文件转换为一个JSON文件 | ||
import pyshark | ||
import json | ||
import binascii | ||
from pathlib import Path | ||
import time | ||
import hashlib | ||
|
||
def packet_to_dict(packet): | ||
packet_dict = {} | ||
for layer in packet.layers: | ||
if layer.layer_name == 'http': | ||
# 仅处理HTTP请求 | ||
if not hasattr(layer, 'request_method'): | ||
continue | ||
|
||
# 提取请求行 | ||
if hasattr(layer, 'request_method'): | ||
packet_dict['method'] = layer.request_method | ||
if hasattr(layer, 'request_uri'): | ||
packet_dict['uri'] = layer.request_uri | ||
if hasattr(layer, 'request_version'): | ||
packet_dict['version'] = layer.request_version | ||
|
||
# 提取头部字段 | ||
headers = {} | ||
body = None | ||
for field_name in layer.field_names: | ||
header_name = field_name.replace('_', '-') | ||
headers[header_name] = getattr(layer, field_name, None) | ||
if field_name == "file_data": | ||
hex_body = layer.file_data.replace(':', '') | ||
body = binascii.unhexlify(hex_body).decode('utf-8', errors='replace') | ||
|
||
# 删除不需要的字段 | ||
excluded_fields = ['', "_ws_expert", "chat", "_ws_expert_message", "_ws_expert_severity", "_ws_expert_group", "request_line", "request_full_uri", "request", "request_number", "file_data", "data_data", "content_length_header","request-method","request-uri","request-version","cookie-pair"] | ||
for field in excluded_fields: | ||
headers.pop(field.replace('_', '-'), None) | ||
|
||
packet_dict['headers'] = headers | ||
if body: | ||
packet_dict['body'] = body | ||
# 计算 packet_dict 的 md5 值 | ||
packet_dict['md5'] = hashlib.md5(json.dumps(packet_dict, sort_keys=True).encode()).hexdigest() | ||
return packet_dict | ||
|
||
def process_pcap_file(pcap_file): | ||
packets = [] | ||
try: | ||
cap = pyshark.FileCapture(str(pcap_file), display_filter='http') | ||
for packet in cap: | ||
packet_dict = packet_to_dict(packet) | ||
if packet_dict: # 仅添加包含HTTP请求的数据包 | ||
packets.append(packet_dict) | ||
cap.close() | ||
except Exception as e: | ||
print(f"Error processing {pcap_file}: {e}") | ||
|
||
return packets | ||
|
||
def pcap_to_json(input_dir, output_file): | ||
# 获取目录下的所有PCAP文件 | ||
pcap_files = Path(input_dir).glob('*.pcap') | ||
# 将 pcap_files 转换成 list | ||
pcap_files = list(pcap_files) | ||
|
||
all_packets = [] | ||
pcap_num = len(pcap_files) | ||
i = 0 | ||
for pcap_file in pcap_files: | ||
packets = process_pcap_file(pcap_file) | ||
pcap_filename = binascii.unhexlify(str(pcap_file.name)).decode('utf-8', errors='replace') | ||
all_packets.append({ | ||
"pcap_file": pcap_filename, | ||
"http_requests": packets | ||
}) | ||
|
||
time.sleep(2) # 等待2秒以避免冲突 | ||
# 进度条 | ||
i += 1 | ||
print(f"\rProgress: {i}/{pcap_num}", end="") | ||
|
||
# 写入到输出文件 | ||
with open(output_file, 'w') as f: | ||
json.dump(all_packets, f, indent=4) | ||
|
||
def main(): | ||
CURRENT_PATH = Path(__file__).parent | ||
input_dir = CURRENT_PATH / 'pcaps' # 替换为你的PCAP文件目录 | ||
output_json = CURRENT_PATH / 'all_http_requests.json' | ||
|
||
# 运行任务 | ||
pcap_to_json(input_dir, output_json) | ||
|
||
if __name__ == "__main__": | ||
main() |