Skip to content

Commit

Permalink
ignore non platform nodes in capacity calculation (#915)
Browse files Browse the repository at this point in the history
  • Loading branch information
zubenkoivan authored May 21, 2024
1 parent f4487a9 commit e8d70d5
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 18 deletions.
10 changes: 3 additions & 7 deletions platform_monitoring/jobs_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,15 +277,11 @@ async def _get_available_resources_by_node_pool(
nodes = await self._kube_client.get_nodes(
label_selector=self._kube_node_pool_label
)
nodes_by_name = {node.metadata.name: node for node in nodes}
for node_name, node_pods in self._get_pods_by_node(pods).items():
for node in nodes:
if node.metadata.name == node_name:
break
else:
raise NodeNotFoundException(node_name)
node_pool_name = node.metadata.labels.get(self._kube_node_pool_label)
if not node_pool_name: # pragma: no coverage
if not (node := nodes_by_name.get(node_name)):
continue
node_pool_name = node.metadata.labels[self._kube_node_pool_label]
resource_requests = sum(
(pod.resource_requests for pod in node_pods), ContainerResources()
)
Expand Down
12 changes: 1 addition & 11 deletions tests/unit/test_jobs_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from neuro_sdk import Jobs as JobsClient

from platform_monitoring.container_runtime_client import ContainerRuntimeClientRegistry
from platform_monitoring.jobs_service import JobsService, NodeNotFoundException
from platform_monitoring.jobs_service import JobsService
from platform_monitoring.kube_client import KubeClient, Node, Pod


Expand Down Expand Up @@ -268,13 +268,3 @@ async def test_get_available_jobs_count_for_pods_without_nodes(
result = await service.get_available_jobs_counts()

assert result == {"cpu": 10, "nvidia-gpu": 2, "amd-gpu": 4, "cpu-p": 0}

async def test_get_available_jobs_count_node_not_found(
self, service: JobsService, kube_client: mock.Mock
) -> None:
kube_client.get_pods.side_effect = get_pods_factory(
create_pod("unknown", cpu_m=1000, memory=2**30)
)

with pytest.raises(NodeNotFoundException, match="Node 'unknown' was not found"):
await service.get_available_jobs_counts()

0 comments on commit e8d70d5

Please sign in to comment.