Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DataModel Plugin #2494

Open
wants to merge 63 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 57 commits
Commits
Show all changes
63 commits
Select commit Hold shift + click to select a range
fa34c52
added data models
cristinaascari Jul 30, 2024
604738b
updated data models
cristinaascari Jul 31, 2024
1cf2e0a
updated data models
cristinaascari Jul 31, 2024
45b7f10
updated data models
cristinaascari Jul 31, 2024
a234319
updated data models
cristinaascari Aug 1, 2024
38c0c29
fix
cristinaascari Aug 1, 2024
3bc44c5
fix
cristinaascari Aug 2, 2024
29f4313
update data models
cristinaascari Aug 5, 2024
1913000
fix
cristinaascari Aug 5, 2024
90187fb
update file data model
cristinaascari Aug 5, 2024
b4462a0
update file data model
cristinaascari Aug 5, 2024
4140f2f
updates file data model
cristinaascari Aug 6, 2024
5ab5614
updates file data model
cristinaascari Aug 6, 2024
e82f98e
updates file data model
cristinaascari Aug 6, 2024
f12776d
updates
cristinaascari Aug 9, 2024
845406c
updates data models
cristinaascari Aug 9, 2024
6f91d84
fix
cristinaascari Aug 9, 2024
0a6529c
fix
cristinaascari Aug 9, 2024
210f008
fix CharFiled max_length
cristinaascari Aug 26, 2024
88376fc
fixes
cristinaascari Aug 26, 2024
858b200
fixes
cristinaascari Aug 26, 2024
7ebe465
Added BaseDataModel
cristinaascari Aug 26, 2024
3717b95
updated BaseDataModel
cristinaascari Aug 26, 2024
8e78524
updates data models
cristinaascari Aug 26, 2024
bdddb86
Merge branch 'develop' into datamodel_plugin
cristinaascari Aug 27, 2024
e6b289d
updates data models
cristinaascari Aug 27, 2024
beb3ff8
added admin data models
cristinaascari Aug 29, 2024
6261ec0
field names fixes
cristinaascari Aug 29, 2024
de6b938
fix ip data model
cristinaascari Aug 29, 2024
dca7a50
fix ip data model admin
cristinaascari Aug 29, 2024
db3f59a
fixes FileDataModel fields
cristinaascari Aug 30, 2024
b0119d6
Update external_references field
cristinaascari Aug 30, 2024
e0e64da
fix linters
cristinaascari Aug 30, 2024
a7beacb
fixes
cristinaascari Aug 30, 2024
0b934ff
updates signature field
cristinaascari Aug 30, 2024
dbbd637
updates tags field
cristinaascari Aug 30, 2024
f57abe3
fix
cristinaascari Aug 30, 2024
1018903
updates data model admin
cristinaascari Aug 30, 2024
3dab81c
fix
cristinaascari Aug 30, 2024
8c96d36
moved data_model into api_app
cristinaascari Sep 2, 2024
7c81328
moved data_model into api_app
cristinaascari Sep 2, 2024
48bfb83
Added unique_together constraint in IETFReport
cristinaascari Sep 2, 2024
c03102a
Merge branch 'develop' into datamodel_plugin
0ssigeno Sep 18, 2024
1b84446
More stuff
0ssigeno Sep 24, 2024
a1cba27
Stuff
0ssigeno Oct 7, 2024
f853d59
More fixes
0ssigeno Oct 14, 2024
c7fa248
More test and logs
0ssigeno Oct 14, 2024
0d85592
Blake
0ssigeno Oct 14, 2024
5dacc79
More
0ssigeno Oct 16, 2024
8e3a3e4
Mini rework
0ssigeno Oct 16, 2024
6914ecd
Blake
0ssigeno Oct 16, 2024
9aeebe3
Blake
0ssigeno Oct 16, 2024
f2384be
Fixes
0ssigeno Oct 16, 2024
84421b4
Merge branch 'develop' into datamodel_plugin
0ssigeno Oct 16, 2024
dba0c22
Fixes
0ssigeno Oct 16, 2024
89875f5
Fixes
0ssigeno Oct 16, 2024
4ad19ab
Technically we can have some mapping with post processing
0ssigeno Oct 16, 2024
bf17b04
bgp_ranking mapping
cristinaascari Oct 21, 2024
c2ef65f
more analyzer mappings
cristinaascari Oct 22, 2024
b175d72
fixes analyzer mappings
cristinaascari Oct 23, 2024
5cd8f3a
more mappings
cristinaascari Oct 23, 2024
fa5a40b
Tor mapping
cristinaascari Oct 23, 2024
a084c17
Fix retrieval
0ssigeno Oct 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions api_app/analyzers_manager/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,28 @@ class BaseAnalyzerMixin(Plugin, metaclass=ABCMeta):
ObservableTypes = ObservableTypes
TypeChoices = TypeChoices

def _do_create_data_model(self) -> bool:
return True

def _create_data_model_mtm(self):
return {}

def _update_data_model(self, data_model) -> None:
mtm = self._create_data_model_mtm()
for field_name, value in mtm.items():
field = getattr(data_model, field_name)
field.set(value)

def create_data_model(self):
self.report: AnalyzerReport
if self._do_create_data_model():
data_model = self.report.create_data_model()
if data_model:
self._update_data_model(data_model)
data_model.save()
return data_model
return None

@classmethod
@property
def config_exception(cls):
Expand Down Expand Up @@ -108,7 +130,11 @@ def after_run_success(self, content):
Args:
content (any): The content to process after a successful run.
"""
super().after_run_success(self._validate_result(content, max_recursion=15))
result = super().after_run_success(
self._validate_result(content, max_recursion=15)
)
self.create_data_model()
return result


class ObservableAnalyzer(BaseAnalyzerMixin, metaclass=ABCMeta):
Expand Down Expand Up @@ -326,7 +352,7 @@ def __polling(self, req_key: str, chance: int, re_poll_try: int = 0):
return self.__polling(req_key, chance, re_poll_try=re_poll_try + 1)
else:
status = json_data.get("status", None)
if status and status == self._job.Status.RUNNING.value:
if status and status == self._job.STATUSES.RUNNING.value:
logger.info(
f"Poll number #{chance + 1}, "
f"status: 'running' <-- {self.__repr__()}"
Expand Down
2 changes: 1 addition & 1 deletion api_app/analyzers_manager/file_analyzers/elf_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def run(self):
)
logger.warning(warning_message)
self.report.errors.append(warning_message)
self.report.status = self.report.Status.FAILED
self.report.status = self.report.STATUSES.FAILED
self.report.save()

return results
2 changes: 1 addition & 1 deletion api_app/analyzers_manager/file_analyzers/pe_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def run(self):
)
logger.warning(warning_message)
self.report.errors.append(warning_message)
self.report.status = self.report.Status.FAILED
self.report.status = self.report.STATUSES.FAILED
self.report.save()

return results
Expand Down
29 changes: 29 additions & 0 deletions api_app/analyzers_manager/file_analyzers/yara_scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,3 +438,32 @@ def update(cls):
logger.info("Finished updating yara rules")
set_permissions(settings.YARA_RULES_PATH)
return True

def _create_data_model_mtm(self):
from api_app.data_model_manager.models import Signature

signatures = []
for signature in self.report.report:
url = signature.pop("rule_url", None)
sign = Signature.objects.create(
provider=Signature.PROVIDERS.YARA.value,
signature=signature,
url=url,
score=1,
)
signatures.append(sign)

return {"signatures": signatures}

def _update_data_model(self, data_model):
from api_app.data_model_manager.models import FileDataModel

super()._update_data_model(data_model)
if data_model:
data_model: FileDataModel
signatures = data_model.signatures.count()
if signatures > 20:
data_model.evaluation = data_model.EVALUATIONS.MALICIOUS.value
elif signatures > 10:
data_model.evaluation = data_model.EVALUATIONS.SUSPICIOUS.value
data_model.save()
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Generated by Django 4.2.15 on 2024-10-14 07:24

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("analyzers_manager", "0122_alter_soft_time_limit"),
]

operations = [
migrations.AddField(
model_name="analyzerconfig",
name="mapping_data_model",
field=models.JSONField(
default=dict, help_text="Mapping data_model_key: analyzer_report_key. "
),
),
]
58 changes: 58 additions & 0 deletions api_app/analyzers_manager/migrations/0124_data_mapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Generated by Django 4.2.15 on 2024-10-14 07:24

from django.db import migrations


def migrate_urlhaus(apps, schema_editor):
AnalyzerConfig = apps.get_model("analyzers_manager", "AnalyzerConfig")
ac = AnalyzerConfig.objects.filter(name="URLhaus").first()
if not ac:
return
ac.mapping_data_model = {
"urlhaus_reference": "external_references",
"$Malicious": "evaluation",
"urls.url": "related_threats",
}
ac.save()


def migrate_maxmind(apps, schema_editor):
AnalyzerConfig = apps.get_model("analyzers_manager", "AnalyzerConfig")
ac = AnalyzerConfig.objects.filter(name="MaxMindGeoIP").first()
if not ac:
return
ac.mapping_data_model = {
"country_code": "country.iso_code",
"registered_country_code": "registered_country_code.iso_code",
"asn": "autonomous_system_number",
"isp": "autonomous_system_organization",
}
ac.save()


def migrate_abuse_ipdb(apps, schema_editor):
AnalyzerConfig = apps.get_model("analyzers_manager", "AnalyzerConfig")
ac = AnalyzerConfig.objects.filter(name="AbuseIPDB").first()
if not ac:
return
ac.mapping_data_model = {
"country_code": "data.countryCode",
"external_references": "permalink",
"resolutions": "data.hostnames",
"isp": "data.isp",
"tags": "categories_found",
}
ac.save()


class Migration(migrations.Migration):

dependencies = [
("analyzers_manager", "0123_analyzerconfig_mapping_data_model"),
]

operations = [
migrations.RunPython(migrate_maxmind, migrations.RunPython.noop),
migrations.RunPython(migrate_abuse_ipdb, migrations.RunPython.noop),
migrations.RunPython(migrate_urlhaus, migrations.RunPython.noop),
]
93 changes: 91 additions & 2 deletions api_app/analyzers_manager/models.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
# This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl
# See the file 'LICENSE' for copying permission.

import json
from logging import getLogger
from typing import Optional
from typing import Dict, Optional, Type

from django.contrib.contenttypes.fields import GenericRelation
from django.contrib.postgres.fields import ArrayField
from django.core.exceptions import ValidationError
from django.db import models
from django.db.models import ForeignKey

from api_app.analyzers_manager.constants import (
HashChoices,
Expand All @@ -16,6 +18,12 @@
from api_app.analyzers_manager.exceptions import AnalyzerConfigurationException
from api_app.analyzers_manager.queryset import AnalyzerReportQuerySet
from api_app.choices import TLP, PythonModuleBasePaths
from api_app.data_model_manager.models import (
BaseDataModel,
DomainDataModel,
FileDataModel,
IPDataModel,
)
from api_app.fields import ChoiceArrayField
from api_app.models import AbstractReport, PythonConfig, PythonModule

Expand All @@ -32,6 +40,83 @@ class Meta:
unique_together = [("config", "job")]
indexes = AbstractReport.Meta.indexes

@property
def data_model_class(self) -> Type[BaseDataModel]:
if self.job.is_sample:
return FileDataModel
if self.job.observable_classification == ObservableTypes.IP.value:
return IPDataModel
if self.job.observable_classification == ObservableTypes.DOMAIN.value:
return DomainDataModel
raise NotImplementedError(
f"Unable to find data model for {self.job.observable_classification}"
)

def _validation_before_data_model(self) -> bool:
if not self.status == self.STATUSES.SUCCESS.value:
logger.info(
f"Skipping data model of {self.config.name} for job {self.config.pk} because status is "
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, consider self.config_id

f"{self.status}"
)
Comment on lines +57 to +60
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
logger.info(
f"Skipping data model of {self.config.name} for job {self.config.pk} because status is "
f"{self.status}"
)
logger.info(
f"Skipping data model of {self.config.name} for job {self.config_id} because status"
f" is {self.status}"
)

return False
data_model_keys = self.data_model_class.get_fields().keys()
for data_model_key in self.config.mapping_data_model.values():
if data_model_key not in data_model_keys:
self.errors.append(
f"Field {data_model_key} not present in {self.data_model_class.__name__}"
)
return True

def _create_data_model_dictionary(self) -> Dict:
result = {}
data_model_fields = self.data_model_class.get_fields()
logger.info(f"Mapping is {json.dumps(self.config.mapping_data_model)}")
for report_key, data_model_key in self.config.mapping_data_model.items():
# this is a constant
if report_key.startswith("$"):
value = report_key
# this is a field of the report
else:
try:
value = self.get_value(self.report, report_key.split("."))
logger.info(f"Retrieved {value} from key {report_key}")
except Exception:
# validation
self.errors.append(f"Field {report_key} not present in report")
continue
# create the related object if necessary
if isinstance(data_model_fields[data_model_key], ForeignKey):
# to create an object we need at least
if not isinstance(value, dict):
self.errors.append(
f"Field {report_key} has type {type(report_key)} while a dictionary is expected"
)
continue
value, _ = data_model_fields[
data_model_key
].related_model.objects.get_or_create(**value)
elif isinstance(data_model_fields[data_model_key], ArrayField):
if data_model_key not in result:
result[data_model_key] = []
if isinstance(value, list):
result[data_model_key].extend(value)
elif isinstance(value, dict):
result[data_model_key].extend(list(value.keys()))
else:
result[data_model_key].append(value)
result[data_model_key] = value
return result

def create_data_model(self) -> Optional[BaseDataModel]:
if not self._validation_before_data_model():
return None
dictionary = self._create_data_model_dictionary()
data_model = self.data_model_class.objects.create(
**dictionary, analyzer_report=self
)

return data_model


class MimeTypes(models.TextChoices):
# IMPORTANT! in case you update this Enum remember to update also the frontend
Expand Down Expand Up @@ -188,6 +273,10 @@ class AnalyzerConfig(PythonConfig):
orgs_configuration = GenericRelation(
"api_app.OrganizationPluginConfiguration", related_name="%(class)s"
)
mapping_data_model = models.JSONField(
default=dict,
help_text="Mapping analyzer_report_key: data_model_key. Keys preceded by the symbol $ will be considered as constants.",
)

@classmethod
@property
Expand Down
13 changes: 13 additions & 0 deletions api_app/analyzers_manager/observable_analyzers/abuseipdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import requests

from api_app.analyzers_manager.classes import ObservableAnalyzer
from api_app.analyzers_manager.models import AnalyzerReport
from tests.mock_utils import MockUpResponse, if_mock_connections, patch


Expand Down Expand Up @@ -93,3 +94,15 @@ def _monkeypatch(cls):
)
]
return super()._monkeypatch(patches=patches)

def _update_data_model(self, data_model) -> None:
super()._update_data_model(data_model)
if self.report.report.get("totalReports", 0):
self.report: AnalyzerReport
if self.report.report["isWhitelisted"]:
evaluation = (
self.report.data_model_class.EVALUATIONS.FALSE_POSITIVE.value
)
else:
evaluation = self.report.data_model_class.EVALUATIONS.MALICIOUS.value
data_model.evaluation = evaluation
26 changes: 26 additions & 0 deletions api_app/analyzers_manager/observable_analyzers/maxmind.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,3 +228,29 @@ def _monkeypatch(cls):
# completely skip because does not work without connection.
patches = [if_mock_connections(patch.object(cls, "run", return_value={}))]
return super()._monkeypatch(patches=patches)

def _update_data_model(self, data_model) -> None:
from api_app.analyzers_manager.models import AnalyzerReport

super()._update_data_model(data_model)
org = self.report.report.get("autonomous_system_organization", None)
if org:
org = org.lower()
self.report: AnalyzerReport
if org in ["fastly", "cloudflare", "akamai"]:
data_model.evaluation = (
self.report.data_model_class.EVALUATIONS.CLEAN.value
)
elif org in [
"zscaler",
"palo alto networks",
"microdata service srl",
"forcepoint",
]:
data_model.evaluation = (
self.report.data_model_class.EVALUATIONS.FALSE_POSITIVE.value
)
elif org in ["stark industries"]:
data_model.evaluation = (
self.report.data_model_class.EVALUATIONS.SUSPICIOUS.value
)
6 changes: 6 additions & 0 deletions api_app/analyzers_manager/observable_analyzers/urlhaus.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ def run(self):

return response.json()

def _do_create_data_model(self) -> bool:
return (
super()._do_create_data_model()
and self.report.report.get("query_status", "no_results") != "no_results"
)

@classmethod
def _monkeypatch(cls):
patches = [
Expand Down
Loading
Loading