forked from MISP/misp-modules
-
Notifications
You must be signed in to change notification settings - Fork 0
/
cuckooimport.py
executable file
·744 lines (658 loc) · 26.3 KB
/
cuckooimport.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
import json
import base64
import io
import logging
import posixpath
import stat
import tarfile
import zipfile
from pymisp import MISPEvent, MISPObject, MISPAttribute
from pymisp.tools import make_binary_objects
from collections import OrderedDict
log = logging.getLogger(__name__)
misperrors = {'error': 'Error'}
moduleinfo = {
'version': '1.1',
'author': 'Pierre-Jean Grenier',
'description': "Import a Cuckoo archive (zipfile or bzip2 tarball), "
"either downloaded manually or exported from the "
"API (/tasks/report/{task_id}/all).",
'module-type': ['import'],
}
moduleconfig = []
mispattributes = {
'inputSource': ['file'],
'output': ['MISP objects', 'malware-sample'],
'format': 'misp_standard',
}
# Attributes for which we can set the "Artifacts dropped"
# category if we want to
ARTIFACTS_DROPPED = (
"filename",
"md5",
"sha1",
"sha256",
"sha512",
"malware-sample",
"mimetype",
"ssdeep",
)
# Same for the category "Payload delivery"
PAYLOAD_DELIVERY = ARTIFACTS_DROPPED
class PrettyDict(OrderedDict):
"""
This class is just intended for a pretty print
of its keys and values.
"""
MAX_SIZE = 30
def __str__(self):
tmp = []
for k, v in self.items():
v = str(v)
if len(v) > self.MAX_SIZE:
k += ',cut'
v = v[:self.MAX_SIZE]
v.replace('\n', ' ')
tmp.append((k, v))
return "; ".join(f"({k}) {v}" for k, v in tmp)
def search_objects(event, name, attributes=[]):
"""
Search for objects in event, which name is `name` and
contain at least the attributes given.
Return a generator.
@ param attributes: a list of (object_relation, value)
"""
match = filter(
lambda obj: all(
obj.name == name
and (obj_relation, str(attr_value)) in map(
lambda attr: (attr.object_relation, str(attr.value)),
obj.attributes
)
for obj_relation, attr_value in attributes
), event.objects
)
return match
def find_process_by_pid(event, pid):
"""
Find a 'process' MISPObject by its PID. If multiple objects are found,
only return the first one.
@ param pid: integer or str
"""
generator = search_objects(
event,
"process",
(('pid', pid),)
)
return next(generator, None)
class CuckooParser():
# This dict is used to generate the userConfig and link the different
# options to the corresponding method of the parser. This way, we avoid
# redundancy and make future changes easier (instead of for instance
# defining all the options in userConfig directly, and then making a
# switch when running the parser).
# Careful about the order here, as we create references between
# MISPObjects/MISPAttributes at the same time we generate them.
# Hence when we create object B, which we want to reference to
# object A, we should already have created object A.
# TODO create references only after all parsing is done
options = {
"Sandbox info": {
"method": lambda self: self.add_sandbox_info(),
"userConfig": {
'type': 'Boolean',
'message': "Add info related to the sandbox",
'checked': 'true',
},
},
"Upload sample": {
"method": lambda self: self.add_sample(),
"userConfig": {
'type': 'Boolean',
'message': "Upload the sample",
'checked': 'true',
},
},
"Processes": {
"method": lambda self: self.add_process_tree(),
"userConfig": {
'type': 'Boolean',
'message': "Add info related to the processes",
'checked': 'true',
},
},
"DNS": {
"method": lambda self: self.add_dns(),
"userConfig": {
'type': 'Boolean',
'message': "Add DNS queries/answers",
'checked': 'true',
},
},
"TCP": {
"method": lambda self: self.add_network("tcp"),
"userConfig": {
'type': 'Boolean',
'message': "Add TCP connections",
'checked': 'true',
},
},
"UDP": {
"method": lambda self: self.add_network("udp"),
"userConfig": {
'type': 'Boolean',
'message': "Add UDP connections",
'checked': 'true',
},
},
"HTTP": {
"method": lambda self: self.add_http(),
"userConfig": {
'type': 'Boolean',
'message': "Add HTTP requests",
'checked': 'true',
},
},
"Signatures": {
"method": lambda self: self.add_signatures(),
"userConfig": {
'type': 'Boolean',
'message': "Add Cuckoo's triggered signatures",
'checked': 'true',
},
},
"Screenshots": {
"method": lambda self: self.add_screenshots(),
"userConfig": {
'type': 'Boolean',
'message': "Upload the screenshots",
'checked': 'true',
},
},
"Dropped files": {
"method": lambda self: self.add_dropped_files(),
"userConfig": {
'type': 'Boolean',
'message': "Upload the dropped files",
'checked': 'true',
},
},
"Dropped buffers": {
"method": lambda self: self.add_dropped_buffers(),
"userConfig": {
'type': 'Boolean',
'message': "Upload the dropped buffers",
'checked': 'true',
},
},
}
def __init__(self, config):
self.event = MISPEvent()
self.files = None
self.malware_binary = None
self.report = None
self.config = {
# if an option is missing (we receive None as a value),
# fall back to the default specified in the options
key: int(
on if on is not None
else self.options[key]["userConfig"]["checked"] == 'true'
)
for key, on in config.items()
}
def get_file(self, relative_filepath):
"""Return an io.BufferedIOBase for the corresponding relative_filepath
in the Cuckoo archive. If not found, return an empty io.BufferedReader
to avoid fatal errors."""
blackhole = io.BufferedReader(open('/dev/null', 'rb'))
res = self.files.get(relative_filepath, blackhole)
if res == blackhole:
log.debug(f"Did not find file {relative_filepath}, "
f"returned an empty file instead")
return res
def read_archive(self, archive_encoded):
"""Read the archive exported from Cuckoo and initialize the class"""
# archive_encoded is base 64 encoded content
# we extract the info about each file but do not retrieve
# it automatically, as it may take too much space in memory
buf_io = io.BytesIO(base64.b64decode(archive_encoded))
if zipfile.is_zipfile(buf_io):
# the archive was probably downloaded from the WebUI
buf_io.seek(0) # don't forget this not to read an empty buffer
z = zipfile.ZipFile(buf_io, 'r')
self.files = {
info.filename: z.open(info)
for info in z.filelist
# only extract the regular files and dirs, we don't
# want any symbolic link
if stat.S_ISREG(info.external_attr >> 16)
or stat.S_ISDIR(info.external_attr >> 16)
}
else:
# the archive was probably downloaded from the API
buf_io.seek(0) # don't forget this not to read an empty buffer
f = tarfile.open(fileobj=buf_io, mode='r:bz2')
self.files = {
info.name: f.extractfile(info)
for info in f.getmembers()
# only extract the regular files and dirs, we don't
# want any symbolic link
if info.isreg() or info.isdir()
}
# We want to keep the order of the keys of sub-dicts in the report,
# eg. the signatures have marks with unknown keys such as
# {'marks': [
# {"suspicious_features": "Connection to IP address",
# "suspicious_request": "OPTIONS http://85.20.18.18/doc"}
# ]}
# To render those marks properly, we can only hope the developpers
# thought about the order in which they put the keys, and keep this
# order so that the signature makes sense to the reader.
# We use PrettyDict, a customization of OrderedDict to do so.
# It will be instanced iteratively when parsing the json (ie. subdicts
# will also be instanced as PrettyDict)
self.report = json.load(
self.get_file("reports/report.json"),
object_pairs_hook=PrettyDict,
)
def read_malware(self):
self.malware_binary = self.get_file("binary").read()
if not self.malware_binary:
log.warn("No malware binary found")
def add_sandbox_info(self):
info = self.report.get("info", {})
if not info:
log.warning("The 'info' field was not found "
"in the report, skipping")
return False
o = MISPObject(name='sandbox-report')
o.add_attribute('score', info['score'])
o.add_attribute('sandbox-type', 'on-premise')
o.add_attribute('on-premise-sandbox', 'cuckoo')
o.add_attribute('raw-report',
f'started on:{info["machine"]["started_on"]} '
f'duration:{info["duration"]}s '
f'vm:{info["machine"]["name"]}/'
f'{info["machine"]["label"]}')
self.event.add_object(o)
def add_sample(self):
"""Add the sample/target of the analysis"""
target = self.report.get("target", {})
category = target.get("category", "")
if not category:
log.warning("Could not find info about the sample "
"in the report, skipping")
return False
if category == "file":
log.debug("Sample is a file, uploading it")
self.read_malware()
file_o, bin_type_o, bin_section_li = make_binary_objects(
pseudofile=io.BytesIO(self.malware_binary),
filename=target["file"]["name"],
)
file_o.comment = "Submitted sample"
# fix categories
for obj in filter(None, (file_o, bin_type_o, *bin_section_li,)):
for attr in obj.attributes:
if attr.type in PAYLOAD_DELIVERY:
attr.category = "Payload delivery"
self.event.add_object(obj)
elif category == "url":
log.debug("Sample is a URL")
o = MISPObject(name='url')
o.add_attribute('url', target['url'])
o.add_attribute('text', "Submitted URL")
self.event.add_object(o)
def add_http(self):
"""Add the HTTP requests"""
network = self.report.get("network", [])
http = network.get("http", [])
if not http:
log.info("No HTTP connection found in the report, skipping")
return False
for request in http:
o = MISPObject(name='http-request')
o.add_attribute('host', request['host'])
o.add_attribute('method', request['method'])
o.add_attribute('uri', request['uri'])
o.add_attribute('user-agent', request['user-agent'])
o.add_attribute('text', f"count:{request['count']} "
f"port:{request['port']}")
self.event.add_object(o)
def add_network(self, proto=None):
"""
Add UDP/TCP traffic
proto must be one of "tcp", "udp"
"""
network = self.report.get("network", [])
li_conn = network.get(proto, [])
if not li_conn:
log.info(f"No {proto} connection found in the report, skipping")
return False
from_to = []
# sort by time to get the "first packet seen" right
li_conn.sort(key=lambda x: x["time"])
for conn in li_conn:
src = conn['src']
dst = conn['dst']
sport = conn['sport']
dport = conn['dport']
if (src, sport, dst, dport) in from_to:
continue
from_to.append((src, sport, dst, dport))
o = MISPObject(name='network-connection')
o.add_attribute('ip-src', src)
o.add_attribute('ip-dst', dst)
o.add_attribute('src-port', sport)
o.add_attribute('dst-port', dport)
o.add_attribute('layer3-protocol', "IP")
o.add_attribute('layer4-protocol', proto.upper())
o.add_attribute('first-packet-seen', conn['time'])
self.event.add_object(o)
def add_dns(self):
"""Add DNS records"""
network = self.report.get("network", [])
dns = network.get("dns", [])
if not dns:
log.info("No DNS connection found in the report, skipping")
return False
for record in dns:
o = MISPObject(name='dns-record')
o.add_attribute('text', f"request type:{record['type']}")
o.add_attribute('queried-domain', record['request'])
for answer in record.get("answers", []):
if answer["type"] in ("A", "AAAA"):
o.add_attribute('a-record', answer['data'])
# TODO implement MX/NS
self.event.add_object(o)
def _get_marks_str(self, marks):
marks_strings = []
for m in marks:
m_type = m.pop("type") # temporarily remove the type
if m_type == "generic":
marks_strings.append(str(m))
elif m_type == "ioc":
marks_strings.append(m['ioc'])
elif m_type == "call":
call = m["call"]
arguments = call.get("arguments", {})
flags = call.get("flags", {})
info = ""
for details in (arguments, flags):
info += f" {details}"
marks_strings.append(f"Call API '{call['api']}'%s" % info)
else:
logging.debug(f"Unknown mark type '{m_type}', skipping")
m["type"] = m_type # restore key 'type'
# TODO implemented marks 'config' and 'volatility'
return marks_strings
def _add_ttp(self, attribute, ttp_short, ttp_num):
"""
Internal wrapper to add the TTP tag from the MITRE galaxy.
@ params
- attribute: MISPAttribute
- ttp_short: short description of the TTP
(eg. "Credential Dumping")
- ttp_num: formatted as "T"+int
(eg. T1003)
"""
attribute.add_tag(f'misp-galaxy:mitre-attack-pattern='
f'"{ttp_short} - {ttp_num}"')
def add_signatures(self):
"""Add the Cuckoo signatures, with as many details as possible
regarding the marks"""
signatures = self.report.get("signatures", [])
if not signatures:
log.info("No signature found in the report")
return False
o = MISPObject(name='sb-signature')
o.add_attribute('software', "Cuckoo")
for sign in signatures:
marks = sign["marks"]
marks_strings = self._get_marks_str(marks)
summary = sign['description']
if marks_strings:
summary += "\n---\n"
marks_strings = set(marks_strings)
description = summary + "\n".join(marks_strings)
a = MISPAttribute()
a.from_dict(type='text', value=description)
for ttp_num, desc in sign.get("ttp", {}).items():
ttp_short = desc["short"]
self._add_ttp(a, ttp_short, ttp_num)
# this signature was triggered by the processes with the following
# PIDs, we can create references
triggered_by_pids = filter(
None,
(m.get("pid", None) for m in marks)
)
# remove redundancy
triggered_by_pids = set(triggered_by_pids)
for pid in triggered_by_pids:
process_o = find_process_by_pid(self.event, pid)
if process_o:
process_o.add_reference(a, "triggers")
o.add_attribute('signature', **a)
self.event.add_object(o)
def _handle_process(self, proc, accu):
"""
This is an internal recursive function to handle one process
from a process tree and then iterate on its children.
List the objects to be added, based on the tree, into the `accu` list.
The `accu` list uses a DFS-like order.
"""
o = MISPObject(name='process')
accu.append(o)
o.add_attribute('pid', proc['pid'])
o.add_attribute('command-line', proc['command_line'])
o.add_attribute('name', proc['process_name'])
o.add_attribute('parent-pid', proc['ppid'])
for child in proc.get('children', []):
pos_child = len(accu)
o.add_attribute('child-pid', child['pid'])
self._handle_process(child, accu)
child_obj = accu[pos_child]
child_obj.add_reference(o, 'child-of')
return o
def add_process_tree(self):
"""Add process tree from the report, as separated process objects"""
behavior = self.report.get("behavior", {})
tree = behavior.get("processtree", [])
if not tree:
log.warning("No process tree found in the report, skipping")
return False
for proc in tree:
objs = []
self._handle_process(proc, objs)
for o in objs:
self.event.add_object(o)
def get_relpath(self, path):
"""
Transform an absolute or relative path into a path relative to the
correct cuckoo analysis directory, without knowing the cuckoo
working directory.
Return an empty string if the path given does not refer to a
file from the analysis directory.
"""
head, tail = posixpath.split(path)
if not tail:
return ""
prev = self.get_relpath(head)
longer = posixpath.join(prev, tail)
if longer in self.files:
return longer
elif tail in self.files:
return tail
else:
return ""
def add_screenshots(self):
"""Add the screenshots taken by Cuckoo in a sandbox-report object"""
screenshots = self.report.get('screenshots', [])
if not screenshots:
log.info("No screenshot found in the report, skipping")
return False
o = MISPObject(name='sandbox-report')
o.add_attribute('sandbox-type', 'on-premise')
o.add_attribute('on-premise-sandbox', "cuckoo")
for shot in screenshots:
# The path given by Cuckoo is an absolute path, but we need a path
# relative to the analysis folder.
path = self.get_relpath(shot['path'])
img = self.get_file(path)
# .decode('utf-8') in order to avoid the b'' format
img_data = base64.b64encode(img.read()).decode('utf-8')
filename = posixpath.basename(path)
o.add_attribute(
"sandbox-file", value=filename,
data=img_data, type='attachment',
category="External analysis",
)
self.event.add_object(o)
def _get_dropped_objs(self, path, filename=None, comment=None):
"""
Internal wrapper to get dropped files/buffers as file objects
@ params
- path: relative to the cuckoo analysis directory
- filename: if not specified, deduced from the path
"""
if not filename:
filename = posixpath.basename(path)
dropped_file = self.get_file(path)
dropped_binary = io.BytesIO(dropped_file.read())
# create ad hoc objects
file_o, bin_type_o, bin_section_li = make_binary_objects(
pseudofile=dropped_binary, filename=filename,
)
if comment:
file_o.comment = comment
# fix categories
for obj in filter(None, (file_o, bin_type_o, *bin_section_li,)):
for attr in obj.attributes:
if attr.type in ARTIFACTS_DROPPED:
attr.category = "Artifacts dropped"
return file_o, bin_type_o, bin_section_li
def _add_yara(self, obj, yara_dict):
"""Internal wrapper to add Yara matches to an MISPObject"""
for yara in yara_dict:
description = yara.get("meta", {}).get("description", "")
name = yara.get("name", "")
obj.add_attribute(
"text",
f"Yara match\n(name) {name}\n(description) {description}",
comment="Yara match"
)
def add_dropped_files(self):
"""Upload the dropped files as file objects"""
dropped = self.report.get("dropped", [])
if not dropped:
log.info("No dropped file found, skipping")
return False
for d in dropped:
# Cuckoo logs three things that are of interest for us:
# - 'filename' which is not the original name of the file
# but is formatted as follow:
# 8 first bytes of SHA265 + _ + original name in lower case
# - 'filepath' which is the original filepath on the VM,
# where the file was dropped
# - 'path' which is the local path of the stored file,
# in the cuckoo archive
filename = d.get("name", "")
original_path = d.get("filepath", "")
sha256 = d.get("sha256", "")
if original_path and sha256:
log.debug(f"Will now try to restore original filename from "
f"path {original_path}")
try:
s = filename.split("_")
if not s:
raise Exception("unexpected filename read "
"in the report")
sha256_first_8_bytes = s[0]
original_name = s[1]
# check our assumptions are valid, if so we can safely
# restore the filename, if not the format may have changed
# so we'll keep the filename of the report
if sha256.startswith(sha256_first_8_bytes) and \
original_path.lower().endswith(original_name) and \
filename not in original_path.lower():
# we can restore the original case of the filename
position = original_path.lower().rindex(original_name)
filename = original_path[position:]
log.debug(f"Successfully restored original filename: "
f"{filename}")
else:
raise Exception("our assumptions were wrong, "
"filename format may have changed")
except Exception as e:
log.debug(f"Cannot restore filename: {e}")
if not filename:
filename = "NO NAME FOUND IN THE REPORT"
log.warning(f'No filename found for dropped file! '
f'Will use "{filename}"')
file_o, bin_type_o, bin_section_o = self._get_dropped_objs(
self.get_relpath(d['path']),
filename=filename,
comment="Dropped file"
)
self._add_yara(file_o, d.get("yara", []))
file_o.add_attribute("fullpath", original_path,
category="Artifacts dropped")
# why is this a list? for when various programs drop the same file?
for pid in d.get("pids", []):
# if we have an object for the process that dropped the file,
# we can link the two (we just take the first result from
# the search)
process_o = find_process_by_pid(self.event, pid)
if process_o:
file_o.add_reference(process_o, "dropped-by")
self.event.add_object(file_o)
def add_dropped_buffers(self):
""""Upload the dropped buffers as file objects"""
buffer = self.report.get("buffer", [])
if not buffer:
log.info("No dropped buffer found, skipping")
return False
for i, buf in enumerate(buffer):
file_o, bin_type_o, bin_section_o = self._get_dropped_objs(
self.get_relpath(buf['path']),
filename=f"buffer {i}",
comment="Dropped buffer"
)
self._add_yara(file_o, buf.get("yara", []))
self.event.add_object(file_o)
def parse(self):
"""Run the parsing"""
for name, active in self.config.items():
if active:
self.options[name]["method"](self)
def get_misp_event(self):
log.debug("Running MISP expansions")
self.event.run_expansions()
return self.event
def handler(q=False):
# In case there's no data
if q is False:
return False
q = json.loads(q)
data = q['data']
parser = CuckooParser(q['config'])
parser.read_archive(data)
parser.parse()
event = parser.get_misp_event()
event = json.loads(event.to_json())
results = {
key: event[key]
for key in ('Attribute', 'Object')
if (key in event and event[key])
}
return {'results': results}
def introspection():
userConfig = {
key: o["userConfig"]
for key, o in CuckooParser.options.items()
}
mispattributes['userConfig'] = userConfig
return mispattributes
def version():
moduleinfo['config'] = moduleconfig
return moduleinfo