This repository has been archived by the owner on Apr 26, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 9
Release sockets without caching #131
Merged
+64
−135
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -110,8 +110,6 @@ | |
import time | ||
from contextlib import contextmanager | ||
from datetime import datetime | ||
from functools import lru_cache | ||
from threading import Lock | ||
from typing import TYPE_CHECKING, Any, Dict, Generator, Optional, Tuple, Union | ||
|
||
import anyio.abc | ||
|
@@ -127,7 +125,6 @@ | |
from prefect.server.schemas.responses import DeploymentResponse | ||
from prefect.utilities.asyncutils import run_sync_in_worker_thread | ||
from prefect.utilities.dockerutils import get_prefect_image_name | ||
from prefect.utilities.hashing import hash_objects | ||
from prefect.utilities.importtools import lazy_import | ||
from prefect.utilities.pydantic import JsonPatch | ||
from prefect.utilities.templating import find_placeholders | ||
|
@@ -138,7 +135,6 @@ | |
BaseWorkerResult, | ||
) | ||
from pydantic import VERSION as PYDANTIC_VERSION | ||
from pydantic import BaseModel | ||
|
||
if PYDANTIC_VERSION.startswith("2."): | ||
from pydantic.v1 import Field, validator | ||
|
@@ -173,64 +169,6 @@ | |
RETRY_MAX_DELAY_JITTER_SECONDS = 3 | ||
|
||
|
||
_LOCK = Lock() | ||
|
||
|
||
class HashableKubernetesClusterConfig(BaseModel): | ||
""" | ||
A hashable version of the KubernetesClusterConfig class. | ||
Used for caching. | ||
""" | ||
|
||
config: dict = Field( | ||
default=..., description="The entire contents of a kubectl config file." | ||
) | ||
context_name: str = Field( | ||
default=..., description="The name of the kubectl context to use." | ||
) | ||
|
||
def __hash__(self): | ||
"""Make the config hashable.""" | ||
return hash( | ||
( | ||
hash_objects(self.config), | ||
self.context_name, | ||
) | ||
) | ||
|
||
|
||
@lru_cache(maxsize=8, typed=True) | ||
def _get_configured_kubernetes_client_cached( | ||
cluster_config: Optional[HashableKubernetesClusterConfig] = None, | ||
) -> Any: | ||
"""Returns a configured Kubernetes client.""" | ||
with _LOCK: | ||
# if a hard-coded cluster config is provided, use it | ||
if cluster_config: | ||
client = kubernetes.config.new_client_from_config_dict( | ||
config_dict=cluster_config.config, | ||
context=cluster_config.context_name, | ||
) | ||
else: | ||
# If no hard-coded config specified, try to load Kubernetes configuration | ||
# within a cluster. If that doesn't work, try to load the configuration | ||
# from the local environment, allowing any further ConfigExceptions to | ||
# bubble up. | ||
try: | ||
kubernetes.config.load_incluster_config() | ||
config = kubernetes.client.Configuration.get_default_copy() | ||
client = kubernetes.client.ApiClient(configuration=config) | ||
except kubernetes.config.ConfigException: | ||
client = kubernetes.config.new_client_from_config() | ||
|
||
if os.environ.get( | ||
"PREFECT_KUBERNETES_WORKER_ADD_TCP_KEEPALIVE", "TRUE" | ||
).strip().lower() in ("true", "1"): | ||
enable_socket_keep_alive(client) | ||
|
||
return client | ||
|
||
|
||
def _get_default_job_manifest_template() -> Dict[str, Any]: | ||
"""Returns the default job manifest template used by the Kubernetes worker.""" | ||
return { | ||
|
@@ -710,62 +648,80 @@ def _stop_job( | |
grace_seconds: int = 30, | ||
): | ||
"""Removes the given Job from the Kubernetes cluster""" | ||
client = self._get_configured_kubernetes_client(configuration) | ||
job_cluster_uid, job_namespace, job_name = self._parse_infrastructure_pid( | ||
infrastructure_pid | ||
) | ||
|
||
if job_namespace != configuration.namespace: | ||
raise InfrastructureNotAvailable( | ||
f"Unable to kill job {job_name!r}: The job is running in namespace " | ||
f"{job_namespace!r} but this worker expected jobs to be running in " | ||
f"namespace {configuration.namespace!r} based on the work pool and " | ||
"deployment configuration." | ||
with self._get_configured_kubernetes_client(configuration) as client: | ||
job_cluster_uid, job_namespace, job_name = self._parse_infrastructure_pid( | ||
infrastructure_pid | ||
) | ||
|
||
current_cluster_uid = self._get_cluster_uid(client) | ||
if job_cluster_uid != current_cluster_uid: | ||
raise InfrastructureNotAvailable( | ||
f"Unable to kill job {job_name!r}: The job is running on another " | ||
"cluster than the one specified by the infrastructure PID." | ||
) | ||
if job_namespace != configuration.namespace: | ||
raise InfrastructureNotAvailable( | ||
f"Unable to kill job {job_name!r}: The job is running in namespace " | ||
f"{job_namespace!r} but this worker expected jobs to be running in " | ||
f"namespace {configuration.namespace!r} based on the work pool and " | ||
"deployment configuration." | ||
) | ||
|
||
with self._get_batch_client(client) as batch_client: | ||
try: | ||
batch_client.delete_namespaced_job( | ||
name=job_name, | ||
namespace=job_namespace, | ||
grace_period_seconds=grace_seconds, | ||
# Foreground propagation deletes dependent objects before deleting | ||
# owner objects. This ensures that the pods are cleaned up before | ||
# the job is marked as deleted. | ||
# See: https://kubernetes.io/docs/concepts/architecture/garbage-collection/#foreground-deletion # noqa | ||
propagation_policy="Foreground", | ||
current_cluster_uid = self._get_cluster_uid(client) | ||
if job_cluster_uid != current_cluster_uid: | ||
raise InfrastructureNotAvailable( | ||
f"Unable to kill job {job_name!r}: The job is running on another " | ||
"cluster than the one specified by the infrastructure PID." | ||
) | ||
except kubernetes.client.exceptions.ApiException as exc: | ||
if exc.status == 404: | ||
raise InfrastructureNotFound( | ||
f"Unable to kill job {job_name!r}: The job was not found." | ||
) from exc | ||
else: | ||
raise | ||
|
||
with self._get_batch_client(client) as batch_client: | ||
try: | ||
batch_client.delete_namespaced_job( | ||
name=job_name, | ||
namespace=job_namespace, | ||
grace_period_seconds=grace_seconds, | ||
# Foreground propagation deletes dependent objects before deleting # noqa | ||
# owner objects. This ensures that the pods are cleaned up before # noqa | ||
# the job is marked as deleted. | ||
# See: https://kubernetes.io/docs/concepts/architecture/garbage-collection/#foreground-deletion # noqa | ||
propagation_policy="Foreground", | ||
) | ||
except kubernetes.client.exceptions.ApiException as exc: | ||
if exc.status == 404: | ||
raise InfrastructureNotFound( | ||
f"Unable to kill job {job_name!r}: The job was not found." | ||
) from exc | ||
else: | ||
raise | ||
|
||
@contextmanager | ||
def _get_configured_kubernetes_client( | ||
self, configuration: KubernetesWorkerJobConfiguration | ||
) -> "ApiClient": | ||
) -> Generator["ApiClient", None, None]: | ||
""" | ||
Returns a configured Kubernetes client. | ||
""" | ||
|
||
cluster_config = None | ||
|
||
if configuration.cluster_config: | ||
cluster_config = HashableKubernetesClusterConfig( | ||
config=configuration.cluster_config.config, | ||
context_name=configuration.cluster_config.context_name, | ||
) | ||
|
||
return _get_configured_kubernetes_client_cached(cluster_config) | ||
try: | ||
if configuration.cluster_config: | ||
client = kubernetes.config.new_client_from_config_dict( | ||
config_dict=configuration.cluster_config.config, | ||
context=configuration.cluster_config.context_name, | ||
) | ||
else: | ||
# If no hardcoded config specified, try to load Kubernetes configuration | ||
# within a cluster. If that doesn't work, try to load the configuration | ||
# from the local environment, allowing any further ConfigExceptions to | ||
# bubble up. | ||
try: | ||
kubernetes.config.load_incluster_config() | ||
config = kubernetes.client.Configuration.get_default_copy() | ||
client = kubernetes.client.ApiClient(configuration=config) | ||
except kubernetes.config.ConfigException: | ||
client = kubernetes.config.new_client_from_config() | ||
|
||
if os.environ.get( | ||
"PREFECT_KUBERNETES_WORKER_ADD_TCP_KEEPALIVE", "TRUE" | ||
).strip().lower() in ("true", "1"): | ||
enable_socket_keep_alive(client) | ||
|
||
yield client | ||
finally: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. see above comment |
||
client.rest_client.pool_manager.clear() | ||
|
||
def _replace_api_key_with_secret( | ||
self, configuration: KubernetesWorkerJobConfiguration, client: "ApiClient" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This makes
_get_configured_kubernetes_client
work like all the other k8s client getters, where weclient.rest_client.pool_manager.clear()
on exit, which releases sockets.