-
Notifications
You must be signed in to change notification settings - Fork 4
/
parsebrologs.py
126 lines (99 loc) · 4.4 KB
/
parsebrologs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import json
from collections import OrderedDict
class ParseBroLogs(object):
"""Class that parses Bro logs and allows log data to be output in CSV or json format.
Attributes:
filepath: Path of Bro log file to read
"""
def __init__(self, filepath, fields=None):
""" Initializes class with data from file with provided filename """
self.data = self._read_log(filepath, fields)
def _read_log(self, filepath, fields=None):
""" Reads data from a bro log file """
options = OrderedDict()
self.filtered_fields = fields
options['data'] = []
options['separator'] = "\t" # Set a default separator in case we don't get the separator
with open(filepath) as infile:
for line in infile.readlines():
if line.startswith("#separator"):
key = str(line[1:].split(" ")[0])
value = str.encode(line[1:].split(" ")[1].strip()).decode('unicode_escape')
options[key] = value
elif line.startswith("#"):
key = str(line[1:].split(options.get('separator'))[0])
value = line[1:].split(options.get('separator'))[1:]
options[key] = value
else:
data = line.split(options.get('separator'))
if len(data) is len(options.get("fields")):
record = OrderedDict()
for x in range(0, len(data) - 1):
if fields is None or options.get("fields")[x] in self.filtered_fields:
record[options.get("fields")[x]] = data[x]
options["data"].append(record)
else:
# Arrays are not the same length
pass
self.fields = options.get("fields")
return options
def get_filtered_fields(self):
"""Returns all log fields with the field filter applied
Returns:
A python list containing the field names in string format with the field filter applied
"""
return self.filtered_fields
def get_fields(self, safe_header=False):
"""Returns all fields present in the log file
Returns:
A python list containing all field names in the log file
"""
return self.fields
def to_csv(self, safe_headers=False):
"""Returns Bro data in CSV format
Returns:
A string in CSV format containing all Bro log data
"""
delim = ","
headers = []
for v in self.data.get("fields"):
if self.filtered_fields is None or v in self.filtered_fields:
if safe_headers:
headers.append(v.strip().replace('.', '_'))
else:
headers.append(v.strip())
csv = delim.join(headers)
csv += "\n"
data_temp = sorted(self.data.get("data"), key=lambda record: record.get("ts"))
for record in data_temp:
csv += delim.join([ v.strip() for v in record.values() ])
csv += "\n"
return csv[:-1] # Remove trailing line
def to_escaped_csv(self, safe_headers=False):
""" Returns Bro data in CSV format with escape characters. This allows fields with , to properly display
Returns:
A string in CSV format containing all Bro log data. All fields are escaped.
"""
delim = ","
headers = []
for v in self.data.get("fields"):
if self.filtered_fields is None or v in self.filtered_fields:
if safe_headers:
headers.append("\"{}\"".format(v.strip().replace('.', '_')))
else:
headers.append("\"{}\"".format(v.strip()))
csv = delim.join(headers)
csv += "\n"
data_temp = sorted(self.data.get("data"), key=lambda record: record.get("ts"))
for record in data_temp:
csv += delim.join([ "\"{}\"".format(v.strip().replace('"',"\\\"")) for v in record.values() ])
csv += "\n"
return csv[:-1] # Remove trailing line
def to_json(self):
"""Returns Bro data as a JSON formatted string
Returns:
The log data as a JSON formatted string
"""
return json.dumps(self.data.get('data'))
def __str__(self):
return json.dumps(self.data)