-
Notifications
You must be signed in to change notification settings - Fork 2
/
malware_data.py
60 lines (47 loc) · 1.98 KB
/
malware_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import openai
import json
import os
openai.api_key = os.getenv('LLAMA_API_KEY')
def generate_synthetic_data_via_api(num_samples=100):
synthetic_data = []
for _ in range(num_samples):
prompt = (
"Generate a synthetic malware event with the following details in this format:\n"
"1. Event ID: <a unique identifier>\n"
"2. Timestamp: <hour between 0 and 23>\n"
"3. Severity: <Low, Medium, High>\n"
"4. Description: <Brief description of the event>\n"
"5. Is the event unusual?: <yes or no>"
)
try:
response = openai.Completion.create(
engine="text-davinci-003",
prompt=prompt,
max_tokens=100,
temperature=0.7
)
response_text = response.choices[0].text.strip()
lines = response_text.split("\n")
if len(lines) < 5:
print(f"Incomplete response, skipping: {response_text}")
continue
event_id = lines[0].split(":")[1].strip()
timestamp = int(lines[1].split(":")[1].strip())
severity = lines[2].split(":")[1].strip()
description = lines[3].split(":")[1].strip()
is_unusual = lines[4].split(":")[1].strip()
label = 1 if is_unusual.lower() == "yes" else 0
synthetic_data.append({
'event_id': event_id,
'timestamp': timestamp,
'severity': severity,
'description': description,
'label': label
})
except Exception as e:
print(f"Error processing response: {str(e)}")
with open('malware_data.json', 'w') as f:
json.dump(synthetic_data, f, indent=4)
print(f"{num_samples} samples generated and saved to malware_data.json")
if __name__ == "__main__":
generate_synthetic_data_via_api(num_samples=100)