From 51145454c767390aafb0d6d801093bdbcd6d81a0 Mon Sep 17 00:00:00 2001 From: Sawyer Date: Mon, 4 Nov 2024 13:31:02 -0500 Subject: [PATCH] Add Kubernetes autoscaler with configuration and state management --- k8s-autoscaler/k8s_autoscaler/api/__init__.py | 0 k8s-autoscaler/k8s_autoscaler/api/routes.py | 135 ++++++++++ k8s-autoscaler/k8s_autoscaler/config.py | 47 ++++ k8s-autoscaler/k8s_autoscaler/dependencies.py | 26 ++ k8s-autoscaler/k8s_autoscaler/kubernetes.py | 71 +++++ k8s-autoscaler/k8s_autoscaler/main.py | 254 +++--------------- k8s-autoscaler/k8s_autoscaler/types.py | 24 ++ k8s-autoscaler/k8s_autoscaler/vllm.py | 73 +++++ k8s-autoscaler/poetry.lock | 36 ++- k8s-autoscaler/pyproject.toml | 1 + 10 files changed, 443 insertions(+), 224 deletions(-) create mode 100644 k8s-autoscaler/k8s_autoscaler/api/__init__.py create mode 100644 k8s-autoscaler/k8s_autoscaler/api/routes.py create mode 100644 k8s-autoscaler/k8s_autoscaler/config.py create mode 100644 k8s-autoscaler/k8s_autoscaler/dependencies.py create mode 100644 k8s-autoscaler/k8s_autoscaler/kubernetes.py create mode 100644 k8s-autoscaler/k8s_autoscaler/types.py create mode 100644 k8s-autoscaler/k8s_autoscaler/vllm.py diff --git a/k8s-autoscaler/k8s_autoscaler/api/__init__.py b/k8s-autoscaler/k8s_autoscaler/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/k8s-autoscaler/k8s_autoscaler/api/routes.py b/k8s-autoscaler/k8s_autoscaler/api/routes.py new file mode 100644 index 0000000..4f7437a --- /dev/null +++ b/k8s-autoscaler/k8s_autoscaler/api/routes.py @@ -0,0 +1,135 @@ +from fastapi import ( + APIRouter, + Request, + Response, + BackgroundTasks, + HTTPException, + status, + Depends, +) +from fastapi.responses import StreamingResponse, JSONResponse +import httpx +import logging +import time +from typing import AsyncGenerator +from ..types import AutoscalerState, PodPhase +from ..config import Settings +from ..kubernetes import KubeCommand +from ..vllm import VLLMManager +from ..dependencies import get_settings, get_state, get_kube, get_vllm_manager + +logger = logging.getLogger(__name__) + +router = APIRouter() + + +async def stream_response(response: httpx.Response) -> AsyncGenerator[bytes, None]: + """Stream response content.""" + try: + async for chunk in response.aiter_bytes(): + yield chunk + except httpx.HTTPError as e: + logger.error(f"Error streaming response: {e}") + raise HTTPException(status_code=502, detail="Error streaming from vLLM service") + + +@router.get("/health") +async def health_check( + kube: KubeCommand = Depends(get_kube), state: AutoscalerState = Depends(get_state) +): + """Health check endpoint.""" + phase = await kube.get_pod_phase() + current_replicas, desired_replicas = await kube.get_replicas() + return { + "status": "healthy", + "vllm_status": phase, + "vllm_running": phase == PodPhase.RUNNING, + "current_replicas": current_replicas, + "desired_replicas": desired_replicas, + "last_activity": time.strftime( + "%Y-%m-%d %H:%M:%S", time.localtime(state.last_activity) + ), + } + + +@router.post("/scale/{replicas}") +async def scale( + replicas: int, + background_tasks: BackgroundTasks, + kube: KubeCommand = Depends(get_kube), + vllm_manager: VLLMManager = Depends(get_vllm_manager), +) -> JSONResponse: + """Manually scale the vLLM deployment.""" + if replicas < 0: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Replica count must be non-negative", + ) + + if not await kube.scale_deployment(replicas): + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to scale deployment", + ) + + if replicas > 0: + vllm_manager.reset_inactivity_timer(background_tasks) + + return JSONResponse( + status_code=status.HTTP_200_OK, + content={"message": f"Scaling deployment to {replicas} replicas"}, + ) + + +@router.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"]) +async def proxy_request( + request: Request, + path: str, + background_tasks: BackgroundTasks, + settings: Settings = Depends(get_settings), + state: AutoscalerState = Depends(get_state), + vllm_manager: VLLMManager = Depends(get_vllm_manager), +) -> StreamingResponse: + """Proxy requests to vLLM service, handling activation as needed.""" + try: + if not await vllm_manager.ensure_running(): + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail=f"vLLM service activation failed after {settings.activation_timeout}s", + ) + + vllm_manager.reset_inactivity_timer(background_tasks) + + if not state.http_client: + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="HTTP client not initialized", + ) + + # Forward the request to vLLM + url = f"{settings.vllm_url_base}/{path}" + headers = dict(request.headers) + headers.pop("host", None) # Remove host header to avoid conflicts + + vllm_response = await state.http_client.request( + method=request.method, + url=url, + headers=headers, + content=await request.body(), + params=request.query_params, + ) + + return StreamingResponse( + stream_response(vllm_response), + status_code=vllm_response.status_code, + headers=dict(vllm_response.headers), + ) + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error processing request: {e}", exc_info=True) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Internal server error", + ) diff --git a/k8s-autoscaler/k8s_autoscaler/config.py b/k8s-autoscaler/k8s_autoscaler/config.py new file mode 100644 index 0000000..4097765 --- /dev/null +++ b/k8s-autoscaler/k8s_autoscaler/config.py @@ -0,0 +1,47 @@ +from pydantic_settings import BaseSettings, SettingsConfigDict +from pydantic import Field +from functools import cached_property + + +class Settings(BaseSettings): + """Application settings with validation and documentation.""" + + model_config = SettingsConfigDict( + env_file=".env", env_file_encoding="utf-8", extra="ignore" + ) + + vllm_service_host: str = Field( + default="vllm-svc", description="Hostname of the vLLM service" + ) + vllm_service_port: str = Field( + default="8000", description="Port of the vLLM service" + ) + vllm_deployment: str = Field( + default="vllm", description="Name of the vLLM deployment" + ) + kubernetes_namespace: str = Field( + default="default", description="Kubernetes namespace for the vLLM deployment" + ) + inactivity_timeout: int = Field( + default=900, + description="Timeout in seconds before scaling down due to inactivity", + gt=0, + ) + activation_timeout: int = Field( + default=120, + description="Timeout in seconds while waiting for vLLM to become ready", + gt=0, + ) + proxy_timeout: float = Field( + default=30.0, description="Timeout in seconds for proxy requests", gt=0 + ) + + @cached_property + def vllm_url_base(self) -> str: + """Base URL for the vLLM service.""" + return f"http://{self.vllm_service_host}:{self.vllm_service_port}" + + @cached_property + def kubectl_base_cmd(self) -> str: + """Base kubectl command with namespace.""" + return f"kubectl -n {self.kubernetes_namespace}" diff --git a/k8s-autoscaler/k8s_autoscaler/dependencies.py b/k8s-autoscaler/k8s_autoscaler/dependencies.py new file mode 100644 index 0000000..360e09c --- /dev/null +++ b/k8s-autoscaler/k8s_autoscaler/dependencies.py @@ -0,0 +1,26 @@ +# app/dependencies.py +from fastapi import Depends +from .config import Settings +from .types import AutoscalerState +from .kubernetes import KubeCommand +from .vllm import VLLMManager + + +def get_settings() -> Settings: + return Settings() + + +def get_state() -> AutoscalerState: + return AutoscalerState() + + +def get_kube(settings: Settings = Depends(get_settings)) -> KubeCommand: + return KubeCommand(settings) + + +def get_vllm_manager( + settings: Settings = Depends(get_settings), + state: AutoscalerState = Depends(get_state), + kube: KubeCommand = Depends(get_kube), +) -> VLLMManager: + return VLLMManager(settings, state, kube) diff --git a/k8s-autoscaler/k8s_autoscaler/kubernetes.py b/k8s-autoscaler/k8s_autoscaler/kubernetes.py new file mode 100644 index 0000000..07cdec3 --- /dev/null +++ b/k8s-autoscaler/k8s_autoscaler/kubernetes.py @@ -0,0 +1,71 @@ +import asyncio +import subprocess +import logging +from .types import PodPhase +from .config import Settings + +logger = logging.getLogger(__name__) + + +class KubeCommand: + """Kubectl command builder and executor.""" + + def __init__(self, settings: Settings): + self.settings = settings + + async def execute(self, cmd: str) -> tuple[bool, str]: + """Execute a kubectl command and return success status and output.""" + full_cmd = f"{self.settings.kubectl_base_cmd} {cmd}" + try: + process = await asyncio.create_subprocess_shell( + full_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + stdout, stderr = await process.communicate() + + success = process.returncode == 0 + output = stdout.decode().strip() if success else stderr.decode().strip() + if not success: + logger.error(f"kubectl command failed: {output}") + return success, output + except Exception as e: + logger.error(f"kubectl command failed: {e}") + return False, str(e) + + async def get_pod_phase(self) -> PodPhase: + """Get the phase of the vLLM pod.""" + success, output = await self.execute( + "get pods -l app=vllm -o jsonpath='{.items[0].status.phase}'" + ) + try: + return PodPhase(output) if success and output else PodPhase.UNKNOWN + except ValueError: + logger.warning(f"Unknown pod phase: {output}") + return PodPhase.UNKNOWN + + async def scale_deployment(self, replicas: int) -> bool: + """Scale vLLM deployment to specified replicas.""" + if replicas < 0: + logger.error(f"Invalid replica count: {replicas}") + return False + + success, output = await self.execute( + f"scale deployment {self.settings.vllm_deployment} --replicas={replicas}" + ) + if success: + logger.info(f"Successfully scaled deployment to {replicas} replicas") + return success + + async def get_replicas(self) -> tuple[int, int]: + """Get current and desired replica counts.""" + cmd = ( + f"get deployment {self.settings.vllm_deployment} " + "-o jsonpath='{.status.replicas} {.spec.replicas}'" + ) + success, output = await self.execute(cmd) + if success and output: + try: + current, desired = map(int, output.split()) + return current, desired + except ValueError: + logger.error(f"Failed to parse replica counts: {output}") + return -1, -1 diff --git a/k8s-autoscaler/k8s_autoscaler/main.py b/k8s-autoscaler/k8s_autoscaler/main.py index 34d9f3a..6ae12af 100644 --- a/k8s-autoscaler/k8s_autoscaler/main.py +++ b/k8s-autoscaler/k8s_autoscaler/main.py @@ -1,15 +1,11 @@ -from fastapi import FastAPI, Response, BackgroundTasks, HTTPException -from fastapi.responses import StreamingResponse +from contextlib import asynccontextmanager +from fastapi import FastAPI import httpx -import asyncio -import subprocess -from asyncio.subprocess import Process import logging -import time -import os -from typing import Optional, TypedDict, Literal, AsyncGenerator, cast -from dataclasses import dataclass -from contextlib import asynccontextmanager +from .config import Settings +from .types import AutoscalerState +from .api import routes +import asyncio # Configure logging logging.basicConfig( @@ -17,227 +13,39 @@ ) logger = logging.getLogger(__name__) -# Type definitions -PodPhase = Literal["Pending", "Running", "Succeeded", "Failed", "Unknown"] - - -class CommandResult(TypedDict): - success: bool - output: str - error: str - - -@dataclass -class Config: - vllm_service_host: str - vllm_service_port: str - vllm_deployment: str - kubernetes_namespace: str - inactivity_timeout: int - activation_timeout: int - - -# Load configuration from environment -config = Config( - vllm_service_host=os.getenv("VLLM_SERVICE_HOST", "vllm-svc"), - vllm_service_port=os.getenv("VLLM_SERVICE_PORT", "8000"), - vllm_deployment=os.getenv("VLLM_DEPLOYMENT_NAME", "vllm"), - kubernetes_namespace=os.getenv("KUBERNETES_NAMESPACE", "default"), - inactivity_timeout=int(os.getenv("INACTIVITY_TIMEOUT", "900")), - activation_timeout=int(os.getenv("ACTIVATION_TIMEOUT", "120")), -) - - -# Global state -class AutoscalerState: - def __init__(self): - self.last_activity: float = time.time() - self.shutdown_task: Optional[asyncio.Task] = None - self.http_client: Optional[httpx.AsyncClient] = None - - -state = AutoscalerState() - - -async def execute_command(cmd: str) -> CommandResult: - """Execute a shell command and return structured result.""" - try: - process: Process = await asyncio.create_subprocess_shell( - cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE - ) - stdout, stderr = await process.communicate() - - return CommandResult( - success=process.returncode == 0, - output=stdout.decode().strip(), - error=stderr.decode().strip(), - ) - except Exception as e: - logger.error(f"Failed to execute command: {str(e)}") - return CommandResult(success=False, output="", error=str(e)) - - -async def check_vllm_status() -> tuple[bool, PodPhase]: - """Check if VLLM deployment is running and ready.""" - cmd = f"kubectl get pods -n {config.kubernetes_namespace} -l app=vllm -o jsonpath='{{.items[0].status.phase}}'" - result = await execute_command(cmd) - - if not result["success"]: - return False, "Unknown" - - phase = result["output"] - if not phase: - return False, "Unknown" - - return phase == "Running", cast(PodPhase, phase) - - -async def scale_vllm(replicas: int) -> bool: - """Scale VLLM deployment to specified number of replicas.""" - cmd = f"kubectl scale deployment -n {config.kubernetes_namespace} {config.vllm_deployment} --replicas={replicas}" - result = await execute_command(cmd) - - if not result["success"]: - logger.error(f"Failed to scale VLLM: {result['error']}") - - return result["success"] - - -async def wait_for_vllm_ready() -> bool: - """Wait for VLLM to become ready within timeout period.""" - start_time = time.time() - while time.time() - start_time < config.activation_timeout: - is_running, phase = await check_vllm_status() - if is_running: - return True - logger.info(f"Waiting for VLLM to be ready. Current phase: {phase}") - await asyncio.sleep(2) - return False - - -async def monitor_inactivity(): - """Monitor for inactivity and scale down when timeout is reached.""" - try: - while True: - await asyncio.sleep(60) # Check every minute - if time.time() - state.last_activity > config.inactivity_timeout: - logger.info( - f"Inactivity timeout of {config.inactivity_timeout}s reached, scaling down VLLM" - ) - if await scale_vllm(0): - logger.info("VLLM scaled down successfully") - else: - logger.error("Failed to scale down VLLM") - break - except Exception as e: - logger.error(f"Error in inactivity monitor: {str(e)}") - finally: - state.shutdown_task = None - - -def reset_inactivity_timer(background_tasks: BackgroundTasks): - """Reset the inactivity timer and start monitoring if needed.""" - state.last_activity = time.time() - - if state.shutdown_task is None: - state.shutdown_task = asyncio.create_task(monitor_inactivity()) - background_tasks.add_task(lambda: state.shutdown_task) - @asynccontextmanager -async def get_http_client(): - """Get or create HTTP client.""" - if state.http_client is None: - state.http_client = httpx.AsyncClient(timeout=30.0) - try: - yield state.http_client - finally: - pass # Keep client alive for reuse - - -async def stream_response(response: httpx.Response) -> AsyncGenerator[bytes, None]: - """Stream response content.""" - async for chunk in response.aiter_bytes(): - yield chunk - - -app = FastAPI(title="VLLM Autoscaler") - +async def lifespan(app: FastAPI): + # Startup + settings = Settings() + state = AutoscalerState() + state.http_client = httpx.AsyncClient(timeout=settings.proxy_timeout) -@app.on_event("startup") -async def startup_event(): - """Initialize HTTP client on startup.""" - state.http_client = httpx.AsyncClient(timeout=30.0) + # Store in app state for access in dependencies + app.state.settings = settings + app.state.state = state + yield -@app.on_event("shutdown") -async def shutdown_event(): - """Clean up resources on shutdown.""" + # Shutdown if state.http_client: await state.http_client.aclose() + if state.shutdown_task: + state.shutdown_task.cancel() + try: + await state.shutdown_task + except asyncio.CancelledError: + pass + + +app = FastAPI( + title="vLLM Autoscaler", + description="Autoscaler and proxy for vLLM deployments in Kubernetes", + version="1.0.0", + lifespan=lifespan, +) - -@app.get("/health") -async def health_check(): - """Health check endpoint.""" - is_running, phase = await check_vllm_status() - return {"status": "healthy", "vllm_status": phase, "vllm_running": is_running} - - -@app.get("/{path:path}") -async def proxy_request( - path: str, - response: Response, - background_tasks: BackgroundTasks, - raw_query_string: str = "", -) -> StreamingResponse: - """Proxy requests to VLLM service, handling activation as needed.""" - try: - # Check if VLLM is running - is_running, phase = await check_vllm_status() - if not is_running: - logger.info( - f"VLLM not running (phase: {phase}), starting activation sequence" - ) - - # Scale up VLLM - if not await scale_vllm(1): - raise HTTPException( - status_code=503, detail="Failed to activate VLLM service" - ) - - # Wait for VLLM to become ready - if not await wait_for_vllm_ready(): - raise HTTPException( - status_code=503, - detail=f"VLLM service activation timeout after {config.activation_timeout}s", - ) - - logger.info("VLLM activation completed successfully") - - # Reset inactivity timer - reset_inactivity_timer(background_tasks) - - # Proxy the request to VLLM - query = f"?{raw_query_string}" if raw_query_string else "" - vllm_url = f"http://{config.vllm_service_host}:{config.vllm_service_port}/{path}{query}" - - async with get_http_client() as client: - vllm_response = await client.get(vllm_url) - - # Create streaming response - return StreamingResponse( - stream_response(vllm_response), - status_code=vllm_response.status_code, - headers=dict(vllm_response.headers), - ) - - except HTTPException: - raise - except Exception as e: - logger.error(f"Error processing request: {str(e)}", exc_info=True) - raise HTTPException(status_code=500, detail="Internal server error") - +app.include_router(routes.router) if __name__ == "__main__": import uvicorn diff --git a/k8s-autoscaler/k8s_autoscaler/types.py b/k8s-autoscaler/k8s_autoscaler/types.py new file mode 100644 index 0000000..49e001b --- /dev/null +++ b/k8s-autoscaler/k8s_autoscaler/types.py @@ -0,0 +1,24 @@ +from enum import Enum +from dataclasses import dataclass, field +import asyncio +from typing import Optional, ClassVar +import httpx +import time + + +class PodPhase(str, Enum): + PENDING = "Pending" + RUNNING = "Running" + SUCCEEDED = "Succeeded" + FAILED = "Failed" + UNKNOWN = "Unknown" + + +@dataclass +class AutoscalerState: + """Global state management for the autoscaler.""" + + last_activity: float = field(default_factory=time.time) + shutdown_task: Optional[asyncio.Task] = None + http_client: Optional[httpx.AsyncClient] = None + scaling_lock: ClassVar[asyncio.Lock] = asyncio.Lock() diff --git a/k8s-autoscaler/k8s_autoscaler/vllm.py b/k8s-autoscaler/k8s_autoscaler/vllm.py new file mode 100644 index 0000000..79b867f --- /dev/null +++ b/k8s-autoscaler/k8s_autoscaler/vllm.py @@ -0,0 +1,73 @@ +import asyncio +import logging +import time +from fastapi import BackgroundTasks +from .types import PodPhase, AutoscalerState +from .config import Settings +from .kubernetes import KubeCommand + +logger = logging.getLogger(__name__) + + +class VLLMManager: + """Manager for vLLM deployment operations.""" + + def __init__(self, settings: Settings, state: AutoscalerState, kube: KubeCommand): + self.settings = settings + self.state = state + self.kube = kube + + async def ensure_running(self) -> bool: + """Ensure vLLM is running, scaling up if necessary.""" + async with self.state.scaling_lock: + phase = await self.kube.get_pod_phase() + if phase == PodPhase.RUNNING: + return True + + logger.info(f"vLLM not running (phase: {phase}), scaling up") + if not await self.kube.scale_deployment(1): + return False + + return await self._wait_until_ready() + + async def _wait_until_ready(self) -> bool: + """Wait for vLLM to become ready.""" + start_time = time.time() + while time.time() - start_time < self.settings.activation_timeout: + phase = await self.kube.get_pod_phase() + if phase == PodPhase.RUNNING: + return True + if phase == PodPhase.FAILED: + logger.error("Pod failed to start") + return False + await asyncio.sleep(2) + logger.error("Timeout waiting for pod to become ready") + return False + + async def monitor_inactivity(self): + """Monitor for inactivity and scale down when timeout is reached.""" + try: + while True: + await asyncio.sleep(60) + current_replicas, _ = await self.kube.get_replicas() + if ( + time.time() - self.state.last_activity + > self.settings.inactivity_timeout + and current_replicas > 0 + ): + logger.info( + f"Scaling down vLLM after {self.settings.inactivity_timeout}s inactivity" + ) + await self.kube.scale_deployment(0) + break + except Exception as e: + logger.error(f"Error in inactivity monitor: {e}") + finally: + self.state.shutdown_task = None + + def reset_inactivity_timer(self, background_tasks: BackgroundTasks): + """Reset inactivity timer and ensure monitoring task is running.""" + self.state.last_activity = time.time() + if self.state.shutdown_task is None: + self.state.shutdown_task = asyncio.create_task(self.monitor_inactivity()) + background_tasks.add_task(lambda: self.state.shutdown_task) diff --git a/k8s-autoscaler/poetry.lock b/k8s-autoscaler/poetry.lock index b59ecf4..39e55c4 100644 --- a/k8s-autoscaler/poetry.lock +++ b/k8s-autoscaler/poetry.lock @@ -298,6 +298,40 @@ files = [ [package.dependencies] typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" +[[package]] +name = "pydantic-settings" +version = "2.6.0" +description = "Settings management using Pydantic" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pydantic_settings-2.6.0-py3-none-any.whl", hash = "sha256:4a819166f119b74d7f8c765196b165f95cc7487ce58ea27dec8a5a26be0970e0"}, + {file = "pydantic_settings-2.6.0.tar.gz", hash = "sha256:44a1804abffac9e6a30372bb45f6cafab945ef5af25e66b1c634c01dd39e0188"}, +] + +[package.dependencies] +pydantic = ">=2.7.0" +python-dotenv = ">=0.21.0" + +[package.extras] +azure-key-vault = ["azure-identity (>=1.16.0)", "azure-keyvault-secrets (>=4.8.0)"] +toml = ["tomli (>=2.0.1)"] +yaml = ["pyyaml (>=6.0.1)"] + +[[package]] +name = "python-dotenv" +version = "1.0.1" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.8" +files = [ + {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"}, + {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + [[package]] name = "sniffio" version = "1.3.1" @@ -359,4 +393,4 @@ standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "42286b323abdc5a2444aa3078fb3f01192955ab30de19fadbb8ad3f28f70a7b2" +content-hash = "01aa266e919a16630b7e43ec5df02dde3d50d1cae3b7c66bc6f26f56cbedf26b" diff --git a/k8s-autoscaler/pyproject.toml b/k8s-autoscaler/pyproject.toml index 021bfcf..6f4b149 100644 --- a/k8s-autoscaler/pyproject.toml +++ b/k8s-autoscaler/pyproject.toml @@ -10,6 +10,7 @@ python = "^3.10" fastapi = "^0.115.3" httpx = "^0.27.2" uvicorn = "^0.32.0" +pydantic-settings = "^2.6.0" [build-system]