diff --git a/.test.sh b/.test.sh index 4e25620..c38888a 100755 --- a/.test.sh +++ b/.test.sh @@ -2,5 +2,5 @@ set -euo pipefail python -m mypy ./src -python -m pyright . +python -m mypy ./tests python -m pytest -vv diff --git a/CHANGELOG.md b/CHANGELOG.md index bf4dcbf..bf5eca4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,19 @@ +## v0.5.0 (2024-10-07) + +### Feat + +- **core**: add environment and execution mode property to base controller +- **llm**: add llm base controller and aws controller +- **vllm**: saving endpoint base url in parameter store + +### Fix + +- **llm**: move the cloud agnostic methods of the controller to the base +- **core**: broken import after clean up __init__ files +- **llm, example**: pulumi only works with `__main__.py` file name +- **core**: init all controllers when using `from damavand...controllers` +- **vllm**: make api route open ai compatible + ## v0.4.1 (2024-10-03) ### Refactor diff --git a/README.md b/README.md index d96761a..e397c04 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,93 @@ -# damavand -## Introduction +# Damavand + +

+ +

+ +## What is Damavand? +Damavand is a comprehensive cloud-native application development framework designed to go beyond traditional Infrastructure as Code (IaC). It simplifies both application logic and cloud infrastructure management, providing developers with a unified, Pythonic approach to building, deploying, and scaling cloud-native applications. Damavand implements the ARC (Application, Resource, Controller) design pattern, ensuring that your cloud resources and application logic work seamlessly together without the complexity of deeply understanding cloud provider-specific details. + +With Damavand, your focus remains on writing business logic while the framework handles cloud architecture, leveraging Pulumi to generate cloud infrastructure code for multi-cloud environments. + +## Why Damavand? +Damavand is built for developers who want to focus on writing applications, not spending countless hours configuring and managing infrastructure. Here’s why Damavand stands out: + +- **Unified Application and Infrastructure:** Develop both cloud resources and business applications in a unified codebase with a clean, logical structure. +- **ARC Design Pattern:** Follow the proven Application, Resource, and Controller pattern to keep code organized, scalable, and maintainable. +- **Best Practices and Flexibility:** Offers optimized architecture designs while allowing developers to customize each part of the framework when needed. +- **Vendor Independence:** Support for multiple cloud providers, avoiding vendor lock-in and giving you the freedom to deploy anywhere. +- **Rapid Time-to-Market:** Dramatically shortens the time it takes to build and deploy cloud-native applications through pre-architected, cloud-agnostic templates and patterns. + +## How Damavand Works + +Damavand empowers developers to handle both the application layer and resource layer within one framework. By following the ARC design pattern, it decouples business logic from cloud complexities, enabling easy customization and scalability across different cloud providers. + +### Example + +> [!TIP] +> Checkout the [examples](examples) directory for more examples. + +Here's an example using Damavand to create an Spark application on AWS (used AWS Glue for compute infrastructure): + +```python +import os +from damavand.cloud.provider import AwsProvider +from damavand.factories import SparkControllerFactory + +from applications.orders import CustomerOrders +from applications.products import Products + + +def main() -> None: + spark_factory = SparkControllerFactory( + provider=AwsProvider( + app_name="my-app", + region="us-west-2", + ), + tags={"env": "dev"}, + ) + + spark_controller = spark_factory.new( + name="my-spark", + applications=[ + Products(), + CustomerOrders(), + ], + ) + + app_name = os.getenv("APP_NAME", "default_app") # Get app name on runtime + + spark_controller.provision() + spark_controller.run_application(app_name) + + +if __name__ == "__main__": + main() +``` + +## Key Features +- **ARC Design Pattern:** Implements the Application, Resource, and Controller layers to streamline the development process. +- **Pulumi-Powered IaC:** Uses Pulumi to manage cloud infrastructure resources in a cloud-agnostic way, reducing complexity. +- **Multi-Cloud Support:** Enables you to build applications that can run on AWS, Azure, and more, avoiding vendor lock-in. +- **Pythonic Flexibility:** Written natively in Python, Damavand allows you to easily modify and extend the framework to meet your application's needs. +- **No Extra Dependencies:** Requires only the Pulumi CLI for cloud infrastructure management—no unnecessary dependencies. + +## What is Damavand Useful For? + +Damavand is perfect for: + +- **Startups:** Accelerate the development and deployment of cloud-native applications. +- **Enterprises:** Ensure scalability, maintainability, and flexibility in cloud applications. +- **Developers:** Damavand allows you to focus on your expertise—whether it's backend development, data engineering, or another area—without worrying about the complexities of cloud architecture. For advanced users, it provides rich layers of customization to push the boundaries of optimizing solutions for specific cloud providers. + +## What Damavand is Not + +Damavand is not just an Infrastructure as Code (IaC) tool. It is not meant to be a full-fledged cloud platform, but rather a framework that integrates both application development and cloud infrastructure in a seamless, unified approach. + +## Supported Languages + +Damavand is developed in Python, with a focus on Python developers looking for a flexible, yet powerful framework for building cloud-native applications. + +## Getting Help + +For support, issues, or feature requests, please open an issue on the Damavand GitHub repository or contact us at support@datachef.co. We're here to help you build your next cloud-native application efficiently and effectively! diff --git a/devenv.lock b/devenv.lock index eb9d121..5e9b274 100644 --- a/devenv.lock +++ b/devenv.lock @@ -3,11 +3,11 @@ "devenv": { "locked": { "dir": "src/modules", - "lastModified": 1722262342, + "lastModified": 1727963652, "owner": "cachix", "repo": "devenv", - "rev": "11a1ca0ad80bc172d2efda34ae542494442dcf48", - "treeHash": "c1be883f8fad6adb0369cef0ac6e6c9bd7f3ec66", + "rev": "cb0052e25dbcc8267b3026160dc73cddaac7d5fd", + "treeHash": "4d81a7de8e23f71c47c22a2d33fe63787fc2d2e1", "type": "github" }, "original": { @@ -38,11 +38,11 @@ "systems": "systems" }, "locked": { - "lastModified": 1710146030, + "lastModified": 1726560853, "owner": "numtide", "repo": "flake-utils", - "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a", - "treeHash": "bd263f021e345cb4a39d80c126ab650bebc3c10c", + "rev": "c1dfcf08411b08f6b8615f7d8971a2bfa81d5e8a", + "treeHash": "0cbff24102dddab81e4c2940d7b356205c955fc6", "type": "github" }, "original": { @@ -95,11 +95,11 @@ ] }, "locked": { - "lastModified": 1720642556, + "lastModified": 1724996935, "owner": "nlewo", "repo": "nix2container", - "rev": "3853e5caf9ad24103b13aa6e0e8bcebb47649fe4", - "treeHash": "a9c2f1d3f52f288515ca0fb11f9aed970fd869b6", + "rev": "fa6bb0a1159f55d071ba99331355955ae30b3401", + "treeHash": "a934d246fadcf8b36d28f3577fad413f5ab3f7d3", "type": "github" }, "original": { @@ -126,11 +126,11 @@ }, "nixpkgs-stable": { "locked": { - "lastModified": 1722221733, + "lastModified": 1727907660, "owner": "NixOS", "repo": "nixpkgs", - "rev": "12bf09802d77264e441f48e25459c10c93eada2e", - "treeHash": "e959ebf2e25b21ec31266bef769b447e4b907916", + "rev": "5966581aa04be7eff830b9e1457d56dc70a0b798", + "treeHash": "55b5fb46cd5d19fe4690148056c5f013a899d746", "type": "github" }, "original": { @@ -150,11 +150,11 @@ "nixpkgs-stable": "nixpkgs-stable" }, "locked": { - "lastModified": 1721042469, + "lastModified": 1727854478, "owner": "cachix", "repo": "pre-commit-hooks.nix", - "rev": "f451c19376071a90d8c58ab1a953c6e9840527fd", - "treeHash": "91f40b7a3b9f6886bd77482cba5b5cd890415a2e", + "rev": "5f58871c9657b5fc0a7f65670fe2ba99c26c1d79", + "treeHash": "9bd8fa1bb0d757e1eb29a1eb0d8da485b57b1b31", "type": "github" }, "original": { diff --git a/devenv.nix b/devenv.nix index 3206765..e40e9ab 100644 --- a/devenv.nix +++ b/devenv.nix @@ -1,17 +1,9 @@ -{ - pkgs, - lib, - config, - inputs, - ... -}: +{ pkgs, lib, config, inputs, ... }: { name = "dmv"; # https://devenv.sh/basics/ - env = { - GREET = "🛠️ Let's hack "; - }; + env = { GREET = "🛠️ Let's hack "; }; # https://devenv.sh/scripts/ scripts.hello.exec = "echo $GREET"; @@ -34,13 +26,7 @@ }; # https://devenv.sh/packages/ - packages = with pkgs; [ - nixfmt-rfc-style - bat - jq - tealdeer - git - ]; + packages = with pkgs; [ nixfmt-rfc-style bat jq tealdeer git ]; languages = { # pyright requires npm @@ -84,6 +70,13 @@ yamllint = { enable = true; settings.preset = "relaxed"; + settings.configuration = '' + --- + extends: relaxed + + rules: + line-length: disable + ''; }; ruff.enable = true; diff --git a/docs/assets/damavand-logo-inverted.png b/docs/assets/damavand-logo-inverted.png new file mode 100644 index 0000000..07bfb1d Binary files /dev/null and b/docs/assets/damavand-logo-inverted.png differ diff --git a/docs/assets/damavand-logo.png b/docs/assets/damavand-logo.png new file mode 100644 index 0000000..163a440 Binary files /dev/null and b/docs/assets/damavand-logo.png differ diff --git a/docs/assets/damavand-logo.psd b/docs/assets/damavand-logo.psd new file mode 100644 index 0000000..767927d Binary files /dev/null and b/docs/assets/damavand-logo.psd differ diff --git a/docs/assets/damavand-logo.svg b/docs/assets/damavand-logo.svg new file mode 100644 index 0000000..5913bd1 --- /dev/null +++ b/docs/assets/damavand-logo.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/examples/advance-aws-llm-with-dspy/Pulumi.yaml b/examples/advance-aws-llm-with-dspy/Pulumi.yaml new file mode 100644 index 0000000..b919d55 --- /dev/null +++ b/examples/advance-aws-llm-with-dspy/Pulumi.yaml @@ -0,0 +1,7 @@ +name: dspy_application +runtime: + name: python + options: + toolchain: pip + virtualenv: venv +description: A minimal Dspy application diff --git a/examples/advance-aws-llm-with-dspy/__main__.py b/examples/advance-aws-llm-with-dspy/__main__.py new file mode 100644 index 0000000..08db51f --- /dev/null +++ b/examples/advance-aws-llm-with-dspy/__main__.py @@ -0,0 +1,31 @@ +import os +from controller import AwsDspyController +from damavand.environment import Environment + +controller = AwsDspyController( + name="my-dspy", + region="eu-west-1", +) + + +def lambda_handler(event, context): + return controller.build_or_run( + app_id=event.get("app_id", "default"), + question=event.get("question", "What llm are you?"), + ) + + +if __name__ == "__main__": + if controller.environment == Environment.LOCAL: + # run the lambda handler locally + event = { + "app_id": os.environ.get("APP_ID", "default"), + "question": os.environ.get("QUESTION", "What llm are you?"), + } + context = {} + + if response := lambda_handler(event, context): + print(response) + else: + # aws automatically calls lambda_handler + pass diff --git a/examples/advance-aws-llm-with-dspy/applications.py b/examples/advance-aws-llm-with-dspy/applications.py new file mode 100644 index 0000000..4bdd7d1 --- /dev/null +++ b/examples/advance-aws-llm-with-dspy/applications.py @@ -0,0 +1,14 @@ +import json +import dspy + + +def default_question(connection: dspy.OpenAI, question: str) -> dict: + dspy.settings.configure(lm=connection) + predict = dspy.Predict("question -> answer") + answer = predict(question=question) + + return { + "statusCode": 200, + "headers": {"Content-Type": "application/json"}, + "body": json.dumps({"response": answer}), + } diff --git a/examples/advance-aws-llm-with-dspy/controller.py b/examples/advance-aws-llm-with-dspy/controller.py new file mode 100644 index 0000000..828977b --- /dev/null +++ b/examples/advance-aws-llm-with-dspy/controller.py @@ -0,0 +1,64 @@ +import dspy +from typing import Callable, Optional + +from damavand.base.controllers.base_controller import runtime +from damavand.cloud.aws.controllers.llm import AwsLlmController + +import applications + + +API_KEY = "EMPTY" + + +class AwsDspyController(AwsLlmController): + def __init__( + self, + name, + region: str, + model: Optional[str] = None, + tags: dict[str, str] = {}, + **kwargs, + ) -> None: + super().__init__(name, region, model, tags, **kwargs) + self.applications: dict[str, Callable] = { + "default": applications.default_question, + } + + @property + @runtime + def connection(self) -> dspy.OpenAI: + """Return the dspy OpenAI model.""" + + return dspy.OpenAI( + model=self.model_id, + api_base=f"{self.base_url}/", + api_key=API_KEY, + model_type="chat", + ) + + @runtime + def run_application(self, app_id: str, question: str, **kwargs) -> dict: + """Run the specified application.""" + + return self.applications[app_id](question, **kwargs) + + @runtime + def build_or_run(self, **kwargs) -> None | dict: + """ + Build or run the application based on the execution mode. + + Parameters + ---------- + kwargs + arguments to be passed to the application. Check the `run_application` method for more information. + + Returns + ------- + None | dict + If the execution mode is runtime, return the output of the application otherwise None. + """ + + if self.is_runtime_execution: + self.run_application(**kwargs) + else: + self.provision() diff --git a/examples/advance-aws-llm-with-dspy/requirements.txt b/examples/advance-aws-llm-with-dspy/requirements.txt new file mode 100644 index 0000000..d353d33 --- /dev/null +++ b/examples/advance-aws-llm-with-dspy/requirements.txt @@ -0,0 +1,5 @@ +-e ../../../damavand +pulumi +boto3 +dspy-ai +sagemaker diff --git a/examples/aws-llm-with-openai-client/Pulumi.yaml b/examples/aws-llm-with-openai-client/Pulumi.yaml new file mode 100644 index 0000000..9655dda --- /dev/null +++ b/examples/aws-llm-with-openai-client/Pulumi.yaml @@ -0,0 +1,7 @@ +name: llm-openai-client +runtime: + name: python + options: + toolchain: pip + virtualenv: venv +description: A simple llm application that uses OpenAI's client. diff --git a/examples/aws-llm-with-openai-client/__main__.py b/examples/aws-llm-with-openai-client/__main__.py new file mode 100644 index 0000000..257a6b3 --- /dev/null +++ b/examples/aws-llm-with-openai-client/__main__.py @@ -0,0 +1,28 @@ +from damavand.cloud.aws.controllers.llm import AwsLlmController + + +controller = AwsLlmController( + name="my-dspy", + region="eu-west-1", +) + + +def lambda_handler(event, context): + question = event.get("question") + role = event.get("role", "user") + + response = controller.client.chat.completions.create( + model=controller.model_id, + messages=[{"role": role, "content": question}], + ) + + return { + "statusCode": 200, + "headers": {"Content-Type": "application/json"}, + "body": response, + } + + +if __name__ == "__main__": + if not controller.is_runtime_execution: + controller.provision() diff --git a/examples/aws-llm-with-openai-client/requirements.txt b/examples/aws-llm-with-openai-client/requirements.txt new file mode 100644 index 0000000..ef80e67 --- /dev/null +++ b/examples/aws-llm-with-openai-client/requirements.txt @@ -0,0 +1,5 @@ +-e ../../../damavand +pulumi +boto3 +sagemaker +openai diff --git a/examples/aws-llm-with-simple-requests/Pulumi.yaml b/examples/aws-llm-with-simple-requests/Pulumi.yaml new file mode 100644 index 0000000..dc40029 --- /dev/null +++ b/examples/aws-llm-with-simple-requests/Pulumi.yaml @@ -0,0 +1,7 @@ +name: simple-llm-application +runtime: + name: python + options: + toolchain: pip + virtualenv: venv +description: A minimal prompt engineering application that uses open source llm models diff --git a/examples/aws-llm-with-simple-requests/__main__.py b/examples/aws-llm-with-simple-requests/__main__.py new file mode 100644 index 0000000..50be635 --- /dev/null +++ b/examples/aws-llm-with-simple-requests/__main__.py @@ -0,0 +1,53 @@ +import requests +import os + +from damavand.cloud.aws.controllers.llm import AwsLlmController + + +controller = AwsLlmController( + name="my-dspy", + region="eu-west-1", +) + + +def ask(question: str, role: str) -> None: + headers = { + "Content-Type": "application/json", + } + + json_data = { + "messages": [ + { + "role": role, + "content": question, + }, + ], + "parameters": { + "max_new_tokens": 400, + }, + "stream": False, + } + + return requests.post( + controller.chat_completions_url, + headers=headers, + json=json_data, + ).json() + + +def lambda_handler(event, context): + question = event.get("question") + role = event.get("role", "user") + return ask(question, role) + + +if __name__ == "__main__": + if not controller.is_runtime_execution: + controller.provision() + else: + event = { + "question": os.environ.get("QUESTION", "What is the capital of France?"), + "role": os.environ.get("ROLE", "user"), + } + context = {} + print(lambda_handler(event, context)) diff --git a/examples/aws-llm-with-simple-requests/requirements.txt b/examples/aws-llm-with-simple-requests/requirements.txt new file mode 100644 index 0000000..e72d5be --- /dev/null +++ b/examples/aws-llm-with-simple-requests/requirements.txt @@ -0,0 +1,4 @@ +-e ../../../damavand +pulumi +boto3 +sagemaker diff --git a/examples/llm/Pulumi.yaml b/examples/llm-resources/Pulumi.yaml similarity index 100% rename from examples/llm/Pulumi.yaml rename to examples/llm-resources/Pulumi.yaml diff --git a/examples/llm/__main__.py b/examples/llm-resources/__main__.py similarity index 100% rename from examples/llm/__main__.py rename to examples/llm-resources/__main__.py diff --git a/examples/llm/requirements.txt b/examples/llm-resources/requirements.txt similarity index 100% rename from examples/llm/requirements.txt rename to examples/llm-resources/requirements.txt diff --git a/src/damavand/__init__.py b/src/damavand/__init__.py index 3d26edf..3d18726 100644 --- a/src/damavand/__init__.py +++ b/src/damavand/__init__.py @@ -1 +1 @@ -__version__ = "0.4.1" +__version__ = "0.5.0" diff --git a/src/damavand/base/controllers/__init__.py b/src/damavand/base/controllers/__init__.py index b8bb73f..9fe8c8b 100644 --- a/src/damavand/base/controllers/__init__.py +++ b/src/damavand/base/controllers/__init__.py @@ -1,11 +1,7 @@ from .base_controller import ApplicationController, runtime, buildtime -from .object_storage import ObjectStorageController -from .spark import SparkController __all__ = [ - "ApplicationController", - "ObjectStorageController", - "SparkController", "runtime", "buildtime", + "ApplicationController", ] diff --git a/src/damavand/base/controllers/base_controller.py b/src/damavand/base/controllers/base_controller.py index 1428825..40860d7 100644 --- a/src/damavand/base/controllers/base_controller.py +++ b/src/damavand/base/controllers/base_controller.py @@ -4,6 +4,7 @@ import pulumi from damavand import utils +from damavand.environment import Environment logger = logging.getLogger(__name__) @@ -60,6 +61,21 @@ def resource(self) -> PulumiResource: raise NotImplementedError() + @property + def environment(self) -> Environment: + """Return the environment that controller is being executed in.""" + + if env := self.extra_args.get("environment"): + return Environment(env) + else: + return Environment.from_system_env() + + @property + def is_runtime_execution(self) -> bool: + """Return True if the execution mode is runtime.""" + + return not utils.is_building() + def provision(self) -> None: """Provision the resource in not provisioned yet.""" diff --git a/src/damavand/base/controllers/llm.py b/src/damavand/base/controllers/llm.py new file mode 100644 index 0000000..92be889 --- /dev/null +++ b/src/damavand/base/controllers/llm.py @@ -0,0 +1,97 @@ +from functools import cache +import logging +from typing import Optional + +from damavand.base.controllers import ApplicationController +from damavand.base.controllers.base_controller import runtime +from damavand.errors import RuntimeException + + +logger = logging.getLogger(__name__) + + +class LlmController(ApplicationController): + """ + Base class for LLM Controllers. This class provides the basic functionality for interacting with LLM APIs. The LLM APIs are following the OpenAI Chat Completions API model. For more information, see the [OpenAI documentation](https://platform.openai.com/docs/api-reference/chat/create). + + LLM Controllers are using vLLM as backend for hardware optimization and serving open source models. For available list of models, see the [vLLM documentation](https://docs.vllm.ai/en/latest/models/supported_models.html). + + Parameters + ---------- + name : str + The name of the controller. + model : Optional[str] + The model name or ID. + tags : dict[str, str] + + Methods + ------- + model_id + Return the model name/ID. + base_url + Return the base URL for the LLM API. + default_api_key + Return the default API key. + chat_completions_url + Return the chat completions URL. + client + Return an OpenAI client as an standared interface for interacting with deployed LLM APIs. + """ + + def __init__( + self, + name, + model: Optional[str] = None, + tags: dict[str, str] = {}, + **kwargs, + ) -> None: + ApplicationController.__init__(self, name, tags, **kwargs) + self._model_name = model + + @property + def model_id(self) -> str: + """Return the model name/ID.""" + + return self._model_name or "microsoft/Phi-3-mini-4k-instruct" + + @property + @runtime + @cache + def base_url(self) -> str: + """Return the base URL for the LLM API.""" + + raise NotImplementedError + + @property + @runtime + @cache + def default_api_key(self) -> str: + """Return the default API key.""" + + raise NotImplementedError + + @property + @runtime + @cache + def chat_completions_url(self) -> str: + """Return the chat completions URL.""" + + return f"{self.base_url}/chat/completions" + + @property + @runtime + @cache + def client(self) -> "openai.OpenAI": # type: ignore # noqa + """Return an OpenAI client as an standared interface for interacting with deployed LLM APIs.""" + + try: + import openai # type: ignore # noqa + except ImportError: + raise RuntimeException( + "Failed to import OpenAI library. Damavand provide this library as an optional dependency. Try to install it using `pip install damavand[openai]` or directly install it using pip or your dependency manager." + ) + + return openai.OpenAI( + api_key=self.default_api_key, + base_url=f"{self.base_url}", + ) diff --git a/src/damavand/cloud/aws/controllers/__init__.py b/src/damavand/cloud/aws/controllers/__init__.py index cb65c17..e69de29 100644 --- a/src/damavand/cloud/aws/controllers/__init__.py +++ b/src/damavand/cloud/aws/controllers/__init__.py @@ -1,7 +0,0 @@ -from .object_storage import AwsObjectStorageController -from .spark import AwsSparkController - -__all__ = [ - "AwsObjectStorageController", - "AwsSparkController", -] diff --git a/src/damavand/cloud/aws/controllers/llm.py b/src/damavand/cloud/aws/controllers/llm.py new file mode 100644 index 0000000..cc08b2a --- /dev/null +++ b/src/damavand/cloud/aws/controllers/llm.py @@ -0,0 +1,114 @@ +import logging +from functools import cache +from typing import Optional + +import boto3 +from botocore.exceptions import ClientError +from pulumi import Resource as PulumiResource + +from damavand.base.controllers.llm import LlmController +from damavand.base.controllers.base_controller import runtime, buildtime +from damavand.cloud.aws.resources import AwsVllmComponent, AwsVllmComponentArgs +from damavand.errors import RuntimeException + + +logger = logging.getLogger(__name__) + + +class AwsLlmController(LlmController): + """ + AWS implementation of the LLM Controller. You can check LlmController for more information. + + Parameters + ---------- + name : str + The name of the controller. + region : str + The AWS region. + model : Optional[str] + The model name or ID. + tags : dict[str, str] + + Methods + ------- + base_url + Return the base URL for the LLM API. + default_api_key + Return the default API key. + resource + Return the Pulumi IaC AwsVllmComponent object. + """ + + def __init__( + self, + name, + region: str, + model: Optional[str] = None, + tags: dict[str, str] = {}, + **kwargs, + ) -> None: + super().__init__(name, model, tags, **kwargs) + self._parameter_store = boto3.client("ssm") + self._region = region + + @property + def _base_url_ssm_name(self) -> str: + """Return the SSM parameter name for the base url.""" + + return f"/damavand/{self.name}/endpoint/url" + + @property + @runtime + @cache + def base_url(self) -> str: + """ + Retrieve the base URL from the SSM parameter store. + + Returns + ------- + str + The base URL. + + Raises + ------ + RuntimeException + If the base URL cannot be retrieved from AWS. + + """ + + try: + response = self._parameter_store.get_parameter( + Name=self._base_url_ssm_name, + ) + + return response["Parameter"]["Value"] + except ClientError as e: + raise RuntimeException( + f"Failed to retrieve endpoint URL from SSM parameter store: {e}" + ) + except KeyError as e: + raise RuntimeException( + f"Failed to retrieve endpoint URL from SSM parameter store: {e}" + ) + + @property + @runtime + @cache + def default_api_key(self) -> str: + """Return the default API key.""" + + return "EMPTY" + + @buildtime + @cache + def resource(self) -> PulumiResource: + """Return the Pulumi IaC AwsVllmComponent object.""" + + return AwsVllmComponent( + name=self.name, + args=AwsVllmComponentArgs( + region=self._region, + public_internet_access=True, + endpoint_ssm_parameter_name=self._base_url_ssm_name, + ), + ) diff --git a/src/damavand/cloud/aws/controllers/object_storage.py b/src/damavand/cloud/aws/controllers/object_storage.py index f6ed9df..5c355e6 100644 --- a/src/damavand/cloud/aws/controllers/object_storage.py +++ b/src/damavand/cloud/aws/controllers/object_storage.py @@ -8,7 +8,8 @@ from pulumi import Resource as PulumiResource from damavand import utils -from damavand.base.controllers import ObjectStorageController, buildtime, runtime +from damavand.base.controllers import buildtime, runtime +from damavand.base.controllers.object_storage import ObjectStorageController from damavand.errors import ( RuntimeException, ObjectNotFound, diff --git a/src/damavand/cloud/aws/controllers/spark.py b/src/damavand/cloud/aws/controllers/spark.py index 1f4713d..80cad7a 100644 --- a/src/damavand/cloud/aws/controllers/spark.py +++ b/src/damavand/cloud/aws/controllers/spark.py @@ -5,7 +5,8 @@ from pulumi import Resource as PulumiResource from sparkle.application import Sparkle -from damavand.base.controllers import SparkController, buildtime +from damavand.base.controllers import buildtime +from damavand.base.controllers.spark import SparkController from damavand.cloud.aws.resources import GlueComponent, GlueComponentArgs from damavand.cloud.aws.resources.glue_component import GlueJobDefinition from damavand.errors import BuildtimeException diff --git a/src/damavand/cloud/aws/resources/vllm_component.py b/src/damavand/cloud/aws/resources/vllm_component.py index c4e41e2..f763fb6 100644 --- a/src/damavand/cloud/aws/resources/vllm_component.py +++ b/src/damavand/cloud/aws/resources/vllm_component.py @@ -33,6 +33,8 @@ class AwsVllmComponentArgs: whether to deploy a public API for the model. api_env_name : str the name of the API environment. + endpoint_ssm_parameter_name : str + the name of the SSM parameter to store the endpoint URL. """ region: str = "us-west-2" @@ -42,6 +44,7 @@ class AwsVllmComponentArgs: instance_type: str = "ml.g4dn.xlarge" public_internet_access: bool = False api_env_name: str = "prod" + endpoint_ssm_parameter_name: str = "/Vllm/endpoint/url" class AwsVllmComponent(PulumiComponentResource): @@ -61,11 +64,11 @@ class AwsVllmComponent(PulumiComponentResource): Methods ------- - assume_policy() - Return the assume role policy for SageMaker. - managed_policy_arns() + get_service_assume_policy(service) + Return the assume role policy for the requested service. + sagemaker_access_policies() Return a list of managed policy ARNs that defines the permissions for Sagemaker. - role() + sagemaker_execution_role() Return an execution role for SageMaker. model_image_ecr_path() Return the ECR image path for the djl-lmi container image serving vllm. @@ -77,6 +80,34 @@ class AwsVllmComponent(PulumiComponentResource): Return a SageMaker endpoint configuration for the vllm model. endpoint() Return a SageMaker endpoint for the vllm model. + api() + Return a public APIGateway RESTAPI for the SageMaker endpoint. + api_resource_v1() + Return a resource for API version routing. + api_resource_chat() + Return a resource for chat routing. + api_resource_completions() + Return a resource for completions routing. + api_method() + Return openai chat completions compatible method. + api_sagemaker_integration_uri() + Return the SageMaker model integration URI for the API Gateway. + apigateway_access_policies() + Return a list of managed policy ARNs that defines the permissions for APIGateway. + api_access_sagemaker_role() + Return an execution role for APIGateway to access SageMaker endpoints. + api_integration() + Return a sagemaker integration for the API Gateway. + api_integration_response() + Return a sagemaker integration response for the API Gateway. + api_method_response() + Return a sagemaker method response for the API Gateway. + api_deployment() + Return an API deployment for the API Gateway. + endpoint_base_url() + Return the base URL for the deployed endpoint. + endpoint_ssm_parameter() + Return an SSM parameter that stores the deployed endpoint URL. """ def __init__( @@ -256,18 +287,18 @@ def api(self) -> aws.apigateway.RestApi: @property @cache - def api_resource(self) -> aws.apigateway.Resource: + def api_resource_v1(self) -> aws.apigateway.Resource: """ Return a resource for the API Gateway. """ return aws.apigateway.Resource( - resource_name=f"{self._name}-api-resource", + resource_name=f"{self._name}-api-resource-v1", opts=ResourceOptions(parent=self), rest_api=self.api.id, parent_id=self.api.root_resource_id, - path_part="chat/completions", + path_part="v1", ) @property @@ -459,7 +490,7 @@ def api_integration(self) -> aws.apigateway.Integration: resource_name=f"{self._name}-api-integration", opts=ResourceOptions(parent=self), rest_api=self.api.id, - resource_id=self.api_resource.id, + resource_id=self.api_resource_completions.id, http_method=self.api_method.http_method, integration_http_method="POST", type="AWS", @@ -479,7 +510,7 @@ def api_integration_response(self) -> aws.apigateway.IntegrationResponse: resource_name=f"{self._name}-api-integration-response", opts=ResourceOptions(parent=self, depends_on=[self.api_integration]), rest_api=self.api.id, - resource_id=self.api_resource.id, + resource_id=self.api_resource_completions.id, http_method=self.api_method.http_method, status_code="200", ) @@ -503,7 +534,7 @@ def api_method_response(self) -> aws.apigateway.MethodResponse: @property @cache - def api_deploy(self) -> aws.apigateway.Deployment: + def api_deployment(self) -> aws.apigateway.Deployment: """ Return an API deployment for the API Gateway. @@ -522,3 +553,52 @@ def api_deploy(self) -> aws.apigateway.Deployment: rest_api=self.api.id, stage_name=self.args.api_env_name, ) + + @property + def endpoint_base_url(self) -> pulumi.Output[str]: + """ + Return the base URL for the deployed endpoint. + + Raises + ------ + AttributeError + When public_internet_access is False. + """ + + if not self.args.public_internet_access: + raise AttributeError( + "`endpoint_base_url` is only available when public_internet_access is True" + ) + + return pulumi.Output.all( + self.api_deployment.invoke_url, self.api_resource_v1.path_part + ).apply(lambda args: f"{args[0]}/{args[1]}") + + @property + @cache + def endpoint_ssm_parameter(self) -> aws.ssm.Parameter: + """ + Return an SSM parameter that stores the deployed endpoint URL. + + Raises + ------ + AttributeError + When public_internet_access is False. + """ + + if not self.args.public_internet_access: + raise AttributeError( + "`endpoint_ssm_parameter`is only available when public_internet_access is True" + ) + + return aws.ssm.Parameter( + resource_name=f"{self._name}-endpoint-ssm-parameter", + opts=ResourceOptions(parent=self), + name=( + self.args.endpoint_ssm_parameter_name + if self.args.public_internet_access + else self.endpoint.endpoint_config_name + ), + type=aws.ssm.ParameterType.STRING, + value=self.endpoint_base_url, + ) diff --git a/src/damavand/cloud/azure/controllers/spark.py b/src/damavand/cloud/azure/controllers/spark.py index c13435d..85d2ee8 100644 --- a/src/damavand/cloud/azure/controllers/spark.py +++ b/src/damavand/cloud/azure/controllers/spark.py @@ -6,7 +6,8 @@ from sparkle.application import Sparkle -from damavand.base.controllers import SparkController, buildtime +from damavand.base.controllers import buildtime +from damavand.base.controllers.spark import SparkController from damavand.cloud.azure.resources import SynapseComponent, SynapseComponentArgs from damavand.cloud.azure.resources.synapse_component import SynapseJobDefinition diff --git a/src/damavand/factories.py b/src/damavand/factories.py index 0367a2a..367e7e3 100644 --- a/src/damavand/factories.py +++ b/src/damavand/factories.py @@ -1,7 +1,7 @@ from sparkle.application import Sparkle from damavand.base.controllers.spark import SparkController from damavand.base.factory import ApplicationControllerFactory -from damavand.cloud.aws.controllers import AwsSparkController +from damavand.cloud.aws.controllers.spark import AwsSparkController from damavand.cloud.azure.controllers import AzureSparkController diff --git a/tests/base/test_spark.py b/tests/base/test_spark.py index 6fcb75f..cedd5c7 100644 --- a/tests/base/test_spark.py +++ b/tests/base/test_spark.py @@ -1,6 +1,6 @@ import pytest -from damavand.base.controllers import SparkController +from damavand.base.controllers.spark import SparkController @pytest.fixture diff --git a/tests/clouds/aws/controllers/test_aws_object_storage.py b/tests/clouds/aws/controllers/test_aws_object_storage.py index d6a43d8..33cab3b 100644 --- a/tests/clouds/aws/controllers/test_aws_object_storage.py +++ b/tests/clouds/aws/controllers/test_aws_object_storage.py @@ -4,7 +4,7 @@ from moto import mock_aws from pulumi_aws import s3 -from damavand.cloud.aws.controllers import AwsObjectStorageController +from damavand.cloud.aws.controllers.object_storage import AwsObjectStorageController from damavand.errors import ObjectNotFound diff --git a/tests/clouds/aws/resources/test_vllm_component.py b/tests/clouds/aws/resources/test_vllm_component.py index 3fbc420..681e266 100644 --- a/tests/clouds/aws/resources/test_vllm_component.py +++ b/tests/clouds/aws/resources/test_vllm_component.py @@ -57,13 +57,13 @@ def test_public_internet_access(): ) assert isinstance(vllm.api, aws.apigateway.RestApi) - assert isinstance(vllm.api_resource, aws.apigateway.Resource) + assert isinstance(vllm.api_resource_completions, aws.apigateway.Resource) assert isinstance(vllm.api_method, aws.apigateway.Method) assert isinstance(vllm.api_access_sagemaker_role, aws.iam.Role) assert isinstance(vllm.api_integration, aws.apigateway.Integration) assert isinstance(vllm.api_integration_response, aws.apigateway.IntegrationResponse) assert isinstance(vllm.api_method_response, aws.apigateway.MethodResponse) - assert isinstance(vllm.api_deploy, aws.apigateway.Deployment) + assert isinstance(vllm.api_deployment, aws.apigateway.Deployment) with pytest.raises(AttributeError): vllm.admin_api_key