From f77800384a7381b28568faf474e57d394345d1a2 Mon Sep 17 00:00:00 2001
From: Matthias Veit <aquamatthias@users.noreply.github.com>
Date: Mon, 27 Nov 2023 18:05:24 +0100
Subject: [PATCH] [feat] Report Summary: add resource counts to summaries
 (#185)

---
 fixbackend/inventory/inventory_service.py     | 78 +++++++++++++------
 fixbackend/inventory/schemas.py               |  5 ++
 .../inventory/inventory_service_test.py       | 40 +++++++---
 3 files changed, 88 insertions(+), 35 deletions(-)

diff --git a/fixbackend/inventory/inventory_service.py b/fixbackend/inventory/inventory_service.py
index 6be53dcd..672b29b7 100644
--- a/fixbackend/inventory/inventory_service.py
+++ b/fixbackend/inventory/inventory_service.py
@@ -57,7 +57,7 @@
 # alias names for better readability
 BenchmarkById = Dict[str, BenchmarkSummary]
 ChecksByBenchmarkId = Dict[str, List[Dict[str, str]]]  # benchmark_id -> [{id: check_id, severity: medium}, ...]
-ChecksByAccountId = Dict[str, Set[str]]
+ChecksByAccountId = Dict[str, Dict[str, int]]  # account_id -> check_id -> count
 SeverityByCheckId = Dict[str, str]
 T = TypeVar("T")
 V = TypeVar("V")
@@ -216,16 +216,23 @@ async def issues_since(
 
         async def account_summary() -> Dict[str, AccountSummary]:
             return {
-                entry["reported"]["id"]: AccountSummary(
-                    id=entry["reported"]["id"],
-                    name=entry["reported"]["name"],
-                    cloud=entry["ancestors"]["cloud"]["reported"]["name"],
+                entry["group"]["account_id"]: AccountSummary(
+                    id=entry["group"]["account_id"],
+                    name=entry["group"]["account_name"],
+                    cloud=entry["group"]["cloud_name"],
+                    resource_count=entry["count"],
+                )
+                async for entry in await self.client.aggregate(
+                    db,
+                    "search /ancestors.account.reported.id!=null | aggregate "
+                    "/ancestors.account.reported.id as account_id, "
+                    "/ancestors.account.reported.name as account_name, "
+                    "/ancestors.cloud.reported.name as cloud_name: sum(1) as count",
                 )
-                async for entry in await self.client.search_list(db, "is (account)")
             }
 
         async def check_summary() -> Tuple[ChecksByAccountId, SeverityByCheckId]:
-            check_accounts: ChecksByAccountId = defaultdict(set)
+            check_accounts: ChecksByAccountId = defaultdict(dict)
             check_severity: Dict[str, str] = {}
 
             async for entry in await self.client.aggregate(
@@ -234,12 +241,13 @@ async def check_summary() -> Tuple[ChecksByAccountId, SeverityByCheckId]:
                 "/security.issues[].check as check_id,"
                 "/security.issues[].severity as severity,"
                 "/ancestors.account.reported.id as account_id"
-                ": sum(1)",
+                ": sum(1) as count",
             ):
                 group = entry["group"]
+                count = entry["count"]
                 check_id = group["check_id"]
                 if isinstance(account_id := group["account_id"], str):
-                    check_accounts[check_id].add(account_id)
+                    check_accounts[check_id][account_id] = count
                 check_severity[check_id] = group["severity"]
             return check_accounts, check_severity
 
@@ -298,29 +306,36 @@ def overall_score(accounts: Dict[str, AccountSummary]) -> int:
 
             # combine benchmark and account data
             account_counter: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(int))
-            severity_counter: Dict[str, int] = defaultdict(int)
-            account_sum_count: Dict[str, int] = defaultdict(int)
+            severity_check_counter: Dict[str, int] = defaultdict(int)
+            severity_resource_counter: Dict[str, int] = defaultdict(int)
+            account_check_sum_count: Dict[str, int] = defaultdict(int)
             failed_checks_by_severity: Dict[str, Set[str]] = defaultdict(set)
             available_checks = 0
             for bid, bench in benchmarks.items():
-                benchmark_counter: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(int))
+                benchmark_account_issue_counter: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(int))
+                benchmark_account_resource_counter: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(int))
                 benchmark_severity_count: Dict[str, int] = defaultdict(int)
                 for check_info in checks.get(bid, []):
                     check_id = check_info["id"]
                     benchmark_severity_count[check_info["severity"]] += 1
                     available_checks += 1
                     if severity := severity_by_check_id.get(check_id):
-                        severity_counter[severity] += 1
-                        for account_id in failed_accounts_by_check_id.get(check_id, []):
-                            benchmark_counter[account_id][severity] += 1
+                        severity_check_counter[severity] += 1
+                        for account_id, failed_resource_count in failed_accounts_by_check_id[check_id].items():
+                            benchmark_account_issue_counter[account_id][severity] += 1
+                            benchmark_account_resource_counter[account_id][severity] += failed_resource_count
+                            severity_resource_counter[severity] += failed_resource_count
                             account_counter[account_id][severity] += 1
-                            account_sum_count[severity] += 1
+                            account_check_sum_count[severity] += 1
                             failed_checks_by_severity[severity].add(check_id)
                 for account_id, account in accounts.items():
                     if account.cloud in bench.clouds:
-                        failing = benchmark_counter.get(account_id)
+                        failing = benchmark_account_issue_counter.get(account_id)
+                        failed_resources = benchmark_account_resource_counter.get(account_id)
                         bench.account_summary[account_id] = BenchmarkAccountSummary(
-                            score=bench_account_score(failing or {}, benchmark_severity_count), failed_checks=failing
+                            score=bench_account_score(failing or {}, benchmark_severity_count),
+                            failed_checks=failing,
+                            failed_resources=failed_resources,
                         )
 
             # compute a score for every account by averaging the scores of all benchmark results
@@ -333,16 +348,26 @@ def overall_score(accounts: Dict[str, AccountSummary]) -> int:
             # get issues for the top 5 issue_ids
             tops = await top_issues(failed_checks_by_severity, num=5)
 
+            # sort top changed account by score
+            vulnerable_changed.accounts_selection.sort(key=lambda x: accounts[x].score if x in accounts else 100)
+            compliant_changed.accounts_selection.sort(key=lambda x: accounts[x].score if x in accounts else 100)
+
             return ReportSummary(
                 check_summary=CheckSummary(
                     available_checks=available_checks,
-                    failed_checks=sum(v for v in severity_counter.values()),
-                    failed_checks_by_severity=severity_counter,
+                    failed_checks=sum(v for v in severity_check_counter.values()),
+                    failed_checks_by_severity=severity_check_counter,
+                    available_resources=sum(v.resource_count for v in accounts.values()),
+                    failed_resources=sum(v for v in severity_resource_counter.values()),
+                    failed_resources_by_severity=severity_resource_counter,
                 ),
                 account_check_summary=CheckSummary(
                     available_checks=available_checks * len(accounts),
-                    failed_checks=sum(v for v in account_sum_count.values()),
-                    failed_checks_by_severity=account_sum_count,
+                    failed_checks=sum(v for v in account_check_sum_count.values()),
+                    failed_checks_by_severity=account_check_sum_count,
+                    available_resources=sum(v.resource_count for v in accounts.values()),
+                    failed_resources=sum(v for v in severity_resource_counter.values()),
+                    failed_resources_by_severity=severity_resource_counter,
                 ),
                 overall_score=overall_score(accounts),
                 accounts=sorted(list(accounts.values()), key=lambda x: x.score),
@@ -354,7 +379,14 @@ def overall_score(accounts: Dict[str, AccountSummary]) -> int:
 
         except GraphDatabaseNotAvailable:
             log.warning("Graph database not available yet. Returning empty summary.")
-            empty = CheckSummary(available_checks=0, failed_checks=0, failed_checks_by_severity={})
+            empty = CheckSummary(
+                available_checks=0,
+                failed_checks=0,
+                failed_checks_by_severity={},
+                available_resources=0,
+                failed_resources=0,
+                failed_resources_by_severity={},
+            )
             return ReportSummary(
                 check_summary=empty,
                 account_check_summary=empty,
diff --git a/fixbackend/inventory/schemas.py b/fixbackend/inventory/schemas.py
index b8c8eb2d..b63a19ec 100644
--- a/fixbackend/inventory/schemas.py
+++ b/fixbackend/inventory/schemas.py
@@ -23,12 +23,14 @@ class AccountSummary(BaseModel):
     id: str = Field(description="The account id")
     name: str = Field(description="The account name")
     cloud: str = Field(description="The name of the cloud provider")
+    resource_count: int = Field(description="The number of resources in the account")
     score: int = Field(description="The score of the account", default=100)
 
 
 class BenchmarkAccountSummary(BaseModel):
     score: int = Field(description="The score of the account", default=0)
     failed_checks: Optional[Dict[str, int]] = Field(description="The number of failed checks by severity.")
+    failed_resources: Optional[Dict[str, int]] = Field(description="The number of failed resources by severity.")
 
 
 class BenchmarkSummary(BaseModel):
@@ -48,6 +50,9 @@ class CheckSummary(BaseModel):
     available_checks: int = Field(description="The number of all available checks.")
     failed_checks: int = Field(description="The number of failed checks.")
     failed_checks_by_severity: Dict[str, int] = Field(description="The number of failed checks by severity.")
+    available_resources: int = Field("The number of all available resources.")
+    failed_resources: int = Field(description="The number of failed resources.")
+    failed_resources_by_severity: Dict[str, int] = Field(description="The number of failed resources by severity.")
 
 
 class VulnerabilitiesChanged(BaseModel):
diff --git a/tests/fixbackend/inventory/inventory_service_test.py b/tests/fixbackend/inventory/inventory_service_test.py
index f3427b4a..27bf41db 100644
--- a/tests/fixbackend/inventory/inventory_service_test.py
+++ b/tests/fixbackend/inventory/inventory_service_test.py
@@ -92,15 +92,15 @@ async def mock(request: Request) -> Response:
                 [{"clouds": ["aws"], "description": "Test AWS", "framework": "CIS", "id": "aws_test", "report_checks": [{"id": "aws_c1", "severity": "high"}, {"id": "aws_c2", "severity": "critical"}], "title": "AWS Test", "version": "0.1"},  # fmt: skip
                  {"clouds": ["gcp"], "description": "Test GCP", "framework": "CIS", "id": "gcp_test", "report_checks": [{"id": "gcp_c1", "severity": "low"}, {"id": "gcp_c2", "severity": "medium"}], "title": "GCP Test", "version": "0.2"}]  # fmt: skip
             )
-        elif request.url.path == "/graph/resoto/search/list" and content == "is (account)":
+        elif request.url.path == "/graph/resoto/search/aggregate" and content.startswith("search /ancestors.account.reported.id!=null"):  # fmt: skip
             return nd_json_response(
-                [{"id": "n1", "type": "node", "reported": {"id": "234", "name": "account 1"}, "ancestors": {"cloud": {"reported": {"name": "gcp", "id": "gcp"}}}},  # fmt: skip
-                 {"id": "n2", "type": "node", "reported": {"id": "123", "name": "account 2"}, "ancestors": {"cloud": {"reported": {"name": "aws", "id": "aws"}}}}]  # fmt: skip
+                [{"group": {"account_id": "123", "account_name": "account 2", "cloud_name": "aws"}, "count": 54321},  # fmt: skip
+                 {"group": {"account_id": "234", "account_name": "account 1", "cloud_name": "gcp"}, "count": 12345}]  # fmt: skip
             )
-        elif request.url.path == "/graph/resoto/search/aggregate":
+        elif request.url.path == "/graph/resoto/search/aggregate" and content.startswith("search /security.has_issues==true"):  # fmt: skip
             return nd_json_response(
-                [{"group": {"check_id": "aws_c1", "severity": "low", "account_id": "123", "account_name": "t1", "cloud": "aws"}, "sum_of_1": 8},  # fmt: skip
-                 {"group": {"check_id": "gcp_c2", "severity": "critical", "account_id": "234", "account_name": "t2", "cloud": "gcp"}, "sum_of_1": 2}]  # fmt: skip
+                [{"group": {"check_id": "aws_c1", "severity": "low", "account_id": "123", "account_name": "t1", "cloud": "aws"}, "count": 8},  # fmt: skip
+                 {"group": {"check_id": "gcp_c2", "severity": "critical", "account_id": "234", "account_name": "t2", "cloud": "gcp"}, "count": 2}]  # fmt: skip
             )
         elif request.url.path == "/graph/resoto/node/some_node_id":
             return json_response(azure_virtual_machine_resource_json)
@@ -128,18 +128,26 @@ async def test_summary(inventory_service: InventoryService, mocked_answers: Requ
     assert summary.check_summary.available_checks == 4
     assert summary.check_summary.failed_checks == 2
     assert summary.check_summary.failed_checks_by_severity == {"critical": 1, "low": 1}
+    assert summary.check_summary.failed_resources == 10
+    assert summary.check_summary.failed_resources_by_severity == {"critical": 2, "low": 8}
     # account checks summary
     assert summary.account_check_summary.available_checks == 8
     assert summary.account_check_summary.failed_checks == 2
     assert summary.account_check_summary.failed_checks_by_severity == {"critical": 1, "low": 1}
+    assert summary.account_check_summary.failed_resources == 10
+    assert summary.account_check_summary.failed_resources_by_severity == {"critical": 2, "low": 8}
     # check benchmarks
     b1, b2 = summary.benchmarks
     assert b1.id == "aws_test"
     assert b1.clouds == ["aws"]
-    assert b1.account_summary == {"123": BenchmarkAccountSummary(score=85, failed_checks={"low": 1})}
+    assert b1.account_summary == {
+        "123": BenchmarkAccountSummary(score=85, failed_checks={"low": 1}, failed_resources={"low": 8})
+    }
     assert b2.id == "gcp_test"
     assert b2.clouds == ["gcp"]
-    assert b2.account_summary == {"234": BenchmarkAccountSummary(score=0, failed_checks={"critical": 1})}
+    assert b2.account_summary == {
+        "234": BenchmarkAccountSummary(score=0, failed_checks={"critical": 1}, failed_resources={"critical": 2})
+    }
     assert len(summary.accounts) == 2
     # check accounts
     gcp, aws = summary.accounts
@@ -152,10 +160,10 @@ async def test_summary(inventory_service: InventoryService, mocked_answers: Requ
     assert aws.cloud == "aws"
     assert aws.score == 85
     # check becoming vulnerable
-    assert summary.changed_vulnerable.accounts_selection == ["123", "234"]
+    assert summary.changed_vulnerable.accounts_selection == ["234", "123"]
     assert summary.changed_vulnerable.resource_count_by_severity == {"critical": 1, "medium": 87}
     assert summary.changed_vulnerable.resource_count_by_kind_selection == {"aws_instance": 87, "gcp_disk": 1}
-    assert summary.changed_compliant.accounts_selection == ["123", "234"]
+    assert summary.changed_compliant.accounts_selection == ["234", "123"]
     assert summary.changed_compliant.resource_count_by_severity == {"critical": 1, "medium": 87}
     assert summary.changed_compliant.resource_count_by_kind_selection == {"aws_instance": 87, "gcp_disk": 1}
     # top checks
@@ -171,9 +179,17 @@ async def app(_: Request) -> Response:
     async_client = AsyncClient(transport=MockTransport(app))
     async with InventoryClient("http://localhost:8980", client=async_client) as client:
         async with InventoryService(client, graph_database_access_manager, domain_event_subscriber) as service:
+            empty = CheckSummary(
+                available_checks=0,
+                failed_checks=0,
+                failed_checks_by_severity={},
+                available_resources=0,
+                failed_resources=0,
+                failed_resources_by_severity={},
+            )
             assert await service.summary(db) == ReportSummary(
-                check_summary=CheckSummary(available_checks=0, failed_checks=0, failed_checks_by_severity={}),
-                account_check_summary=CheckSummary(available_checks=0, failed_checks=0, failed_checks_by_severity={}),
+                check_summary=empty,
+                account_check_summary=empty,
                 overall_score=0,
                 accounts=[],
                 benchmarks=[],