Skip to content

Commit

Permalink
Merge pull request #1139 from SEKOIA-IO/fix/tehtris_remove_duplicates
Browse files Browse the repository at this point in the history
Fix: Tehtris remove duplicates from result (257)
  • Loading branch information
vg-svitla authored Oct 24, 2024
2 parents cc7a19d + fda2ea6 commit 924f8da
Show file tree
Hide file tree
Showing 7 changed files with 131 additions and 7 deletions.
6 changes: 6 additions & 0 deletions Tehtris/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

## 2024-05-30 - 1.15.2

### Fixed

- Remove duplicates from results when fetch new events from the API

## 2024-05-30 - 1.15.1

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion Tehtris/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"name": "TEHTRIS",
"uuid": "1528d749-d353-4e38-ab1b-6e01d7595569",
"slug": "tehtris",
"version": "1.15.1",
"version": "1.15.2",
"categories": [
"Endpoint"
]
Expand Down
16 changes: 13 additions & 3 deletions Tehtris/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Tehtris/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ python = ">=3.10,<3.12"
sekoia-automation-sdk = "^1.13.0"
orjson = "^3.7.7"
python-dateutil = "^2.8.2"
cachetools = "^5.4.0"

[tool.poetry.dev-dependencies]
pytest = "*"
Expand Down
2 changes: 1 addition & 1 deletion Tehtris/tehtris_modules/metrics.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from prometheus_client import Counter, Histogram, Gauge
from prometheus_client import Counter, Gauge, Histogram

# Declare google prometheus metrics
prom_namespace_tehtris = "symphony_module_tehtris"
Expand Down
25 changes: 23 additions & 2 deletions Tehtris/tehtris_modules/trigger_tehtris_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
from collections.abc import Generator
from datetime import datetime, timedelta, timezone
from functools import cached_property
from typing import Any

import orjson
from cachetools import Cache, LRUCache
from dateutil.parser import isoparse
from sekoia_automation.connector import Connector, DefaultConnectorConfiguration

Expand Down Expand Up @@ -31,6 +33,7 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.from_date = datetime.now(timezone.utc) - timedelta(minutes=1)
self.fetch_events_limit = 100
self.events_cache: Cache = LRUCache(maxsize=1000) # TODO: is it enough to have 1000 event ids in cache?

@cached_property
def client(self):
Expand Down Expand Up @@ -77,15 +80,33 @@ def __fetch_next_events(self, from_date: datetime, offset: int):
)
return events

def _remove_duplicates(self, events: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""
Remove duplicates events from the fetched events and update the cache with new ids.
Args:
events: list[dict[str, Any]]
Returns:
list[dict[str, Any]]:
"""
result = [event for event in events if event["uid"] not in self.events_cache]
self.events_cache.update({event["uid"]: None for event in result})

return result

def fetch_events(self) -> Generator[list, None, None]:
has_more_message = True
most_recent_date_seen = self.from_date
offset = 0

while has_more_message:
# fetch events from the current context
next_events = self.__fetch_next_events(self.from_date, offset)
INCOMING_MESSAGES.labels(intake_key=self.configuration.intake_key).inc(len(next_events))
fetched_events = self.__fetch_next_events(self.from_date, offset)
INCOMING_MESSAGES.labels(intake_key=self.configuration.intake_key).inc(len(fetched_events))

# remove duplicates events from previous fetch
next_events = self._remove_duplicates(fetched_events)

# if the number of fetched events equals the limit, additional events are remaining
has_more_message = len(next_events) == self.fetch_events_limit
Expand Down
86 changes: 86 additions & 0 deletions Tehtris/tests/test_tehtris_event_trigger.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from datetime import datetime, timezone
from typing import Any
from unittest.mock import MagicMock, patch

import pytest
Expand Down Expand Up @@ -37,6 +38,40 @@ def trigger(symphony_storage, patch_datetime_now):
yield trigger


def message(event_id: int) -> dict[str, Any]:
# flake8: noqa
return {
"rflId": 1,
"time": "2022-10-19T12:00:00.163407+00:00",
"lvl": 5,
"module": "das",
"eventName": "HeuristicAlert",
"ipSrc": "1.2.3.4",
"ipDst": "5.6.7.8",
"egKBId": 110000031301810,
"description": "Suspect spawn tree detected\n─ (Example\\doe-j) C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe (24644)\n── (Example\\doe-j) C:\\Windows\\System32\\cmd.exe (24876)\n\nNo remediation taken",
"os_release__": "11",
"pid": 24876,
"domain__": "example.org",
"os_version__": "10.0.22621",
"cmdline": 'C:\\WINDOWS\\system32\\cmd.exe /d /c "C:\\Users\\doe-j\\AppData\\Local\\Programs\\IT Hit\\IT Hit Edit Doc Opener Host 5\\NativeHost.exe" chrome-extension://mdfaonmaoigngflemfmkboffllkopopm/ --parent-window=0 < \\\\.\\pipe\\LOCAL\\edge.nativeMessaging.in.c7c2f388b0eb2f77 > \\\\.\\pipe\\LOCAL\\edge.nativeMessaging.out.c7c2f388b0eb2f77',
"username": "Example\\doe-j",
"pCreateDatetime": "2022-10-19T12:00:00.098346+00:00",
"location": "",
"os_server__": False,
"sha256": "01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b",
"ppid": 24644,
"uuid__": "3be682e9-5568-4dbf-8e2d-5b36159945da",
"path": "C:\\Windows\\System32\\cmd.exe",
"tag": "YBE_PDT_WIN",
"uid": f"{event_id};windows;HOST01;example.org",
"os__": "windows",
"os_architecture__": "x86_64",
"hostname__": "HOST01",
"id": event_id,
}


@pytest.fixture
def message1():
# flake8: noqa
Expand Down Expand Up @@ -120,6 +155,57 @@ def test_fetch_events(trigger, message1, message2):
assert next(trigger.fetch_events()) == [message1, message2]


def test_fetch_events_without_duplicates(trigger, message1, message2):
with requests_mock.Mocker() as mock:
first_batch = [
message(1),
message(2),
message(3),
]

second_batch = [
message(2),
message(3),
message(4),
message(5),
]

mock.get(
"https://abc.api.tehtris.net/api/xdr/v1/event",
status_code=200,
json=first_batch,
)

result_first = next(trigger.fetch_events())
assert [event["id"] for event in result_first] == [1, 2, 3]
assert [event["uid"] for event in result_first] == [
"1;windows;HOST01;example.org",
"2;windows;HOST01;example.org",
"3;windows;HOST01;example.org",
]

assert trigger.events_cache == {
"1;windows;HOST01;example.org": None,
"2;windows;HOST01;example.org": None,
"3;windows;HOST01;example.org": None,
}

mock.get(
"https://abc.api.tehtris.net/api/xdr/v1/event",
status_code=200,
json=second_batch,
)

result_second = next(trigger.fetch_events())

assert [event["id"] for event in result_second] == [4, 5]
assert [event["uid"] for event in result_second] == [
"4;windows;HOST01;example.org",
"5;windows;HOST01;example.org",
]
assert len(result_second) == 2


def test_fetch_events_pagination(trigger, message1, message2):
first_batch = [message1] * 100
second_batch = [message2] * 25
Expand Down

0 comments on commit 924f8da

Please sign in to comment.