From d2a8028b3b992d1765bb32dad8d93931892df47c Mon Sep 17 00:00:00 2001
From: pm3310
Date: Sat, 17 Feb 2024 12:59:36 +0000
Subject: [PATCH] LLM Platform
---
sagify/commands/llm.py | 255 +++++++++++++++++++++++++++++++++++--
sagify/llm_gateway/main.py | 4 +-
2 files changed, 246 insertions(+), 13 deletions(-)
diff --git a/sagify/commands/llm.py b/sagify/commands/llm.py
index 34cb237..9016cee 100644
--- a/sagify/commands/llm.py
+++ b/sagify/commands/llm.py
@@ -15,6 +15,94 @@
click.disable_unicode_literals_warning = True
+_MAPPING_CHAT_COMPLETIONS_MODEL_ID_TO_MODEL_NAME = {
+ 'llama-2-7b': ('meta-textgeneration-llama-2-7b-f', 'https://huggingface.co/meta-llama/Llama-2-7b'),
+ 'llama-2-13b': ('meta-textgeneration-llama-2-13b-f', 'https://huggingface.co/meta-llama/Llama-2-13b'),
+ 'llama-2-70b': ('meta-textgeneration-llama-2-70b-f', 'https://huggingface.co/meta-llama/Llama-2-70b'),
+}
+
+_VALID_INSTANCE_TYPES_PER_CHAT_COMPLETIONS_MODEL = {
+ 'meta-textgeneration-llama-2-7b-f': [
+ ('ml.g5.2xlarge', 'https://instances.vantage.sh/aws/ec2/g5.2xlarge'),
+ ('ml.g5.4xlarge', 'https://instances.vantage.sh/aws/ec2/g5.4xlarge'),
+ ('ml.g5.12xlarge', 'https://instances.vantage.sh/aws/ec2/g5.12xlarge'),
+ ('ml.g5.24xlarge', 'https://instances.vantage.sh/aws/ec2/g5.24xlarge'),
+ ('ml.g5.48xlarge', 'https://instances.vantage.sh/aws/ec2/g5.48xlarge'),
+ ('ml.p3dn.24xlarge', 'https://instances.vantage.sh/aws/ec2/p3dn.24xlarge'),
+ ],
+ 'meta-textgeneration-llama-2-13b-f': [
+ ('ml.g5.12xlarge', 'https://instances.vantage.sh/aws/ec2/g5.12xlarge'),
+ ('ml.g5.24xlarge', 'https://instances.vantage.sh/aws/ec2/g5.24xlarge'),
+ ('ml.g5.48xlarge', 'https://instances.vantage.sh/aws/ec2/g5.48xlarge'),
+ ],
+ 'meta-textgeneration-llama-2-70b-f': [
+ ('ml.g5.48xlarge', 'https://instances.vantage.sh/aws/ec2/g5.48xlarge'),
+ ],
+}
+
+_MAPPING_IMAGE_CREATION_MODEL_ID_TO_MODEL_NAME = {
+ 'stabilityai-stable-diffusion-v2': (
+ 'model-txt2img-stabilityai-stable-diffusion-v2',
+ 'https://huggingface.co/stabilityai/stable-diffusion-2'
+ ),
+ 'stabilityai-stable-diffusion-v2-1-base': (
+ 'model-txt2img-stabilityai-stable-diffusion-v2-1-base',
+ 'https://huggingface.co/stabilityai/stable-diffusion-2-1-base'
+ ),
+ 'stabilityai-stable-diffusion-v2-fp16': (
+ 'model-txt2img-stabilityai-stable-diffusion-v2-fp16',
+ 'https://huggingface.co/stabilityai/stable-diffusion-2/tree/fp16'
+ )
+}
+
+_VALID_INSTANCE_TYPES_PER_IMAGE_CREATIONS_MODEL = {
+ 'model-txt2img-stabilityai-stable-diffusion-v2': [
+ ('ml.p3.2xlarge', 'https://instances.vantage.sh/aws/ec2/p3.2xlarge'),
+ ('ml.g4dn.2xlarge', 'https://instances.vantage.sh/aws/ec2/g4dn.2xlarge'),
+ ('ml.g5.2xlarge', 'https://instances.vantage.sh/aws/ec2/g5.2xlarge'),
+ ],
+ 'model-txt2img-stabilityai-stable-diffusion-v2-1-base': [
+ ('ml.p3.2xlarge', 'https://instances.vantage.sh/aws/ec2/p3.2xlarge'),
+ ('ml.g4dn.2xlarge', 'https://instances.vantage.sh/aws/ec2/g4dn.2xlarge'),
+ ('ml.g5.2xlarge', 'https://instances.vantage.sh/aws/ec2/g5.2xlarge'),
+ ],
+ 'model-txt2img-stabilityai-stable-diffusion-v2-fp16': [
+ ('ml.p3.2xlarge', 'https://instances.vantage.sh/aws/ec2/p3.2xlarge'),
+ ('ml.g4dn.2xlarge', 'https://instances.vantage.sh/aws/ec2/g4dn.2xlarge'),
+ ('ml.g5.2xlarge', 'https://instances.vantage.sh/aws/ec2/g5.2xlarge'),
+ ],
+}
+
+_MAPPING_EMBEDDINGS_MODEL_ID_TO_MODEL_NAME = {
+ 'bge-large-en': ('huggingface-sentencesimilarity-bge-large-en', 'https://huggingface.co/BAAI/bge-large-en'),
+ 'bge-base-en': ('huggingface-sentencesimilarity-bge-base-en', 'https://huggingface.co/BAAI/bge-base-en'),
+ 'gte-large': ('huggingface-sentencesimilarity-gte-large', 'https://huggingface.co/thenlper/gte-large'),
+ 'gte-base': ('huggingface-sentencesimilaritygte-base', 'https://huggingface.co/thenlper/gte-base'),
+ 'e5-large-v2': ('huggingface-sentencesimilarity-e5-large-v2', 'https://huggingface.co/intfloat/e5-large-v2'),
+ 'bge-small-en': ('huggingface-sentencesimilarity-bge-small-en', 'https://huggingface.co/BAAI/bge-small-en'),
+ 'e5-base-v2': ('huggingface-sentencesimilarity-e5-base-v2', 'https://huggingface.co/intfloat/e5-base-v2'),
+ 'multilingual-e5-large': ('huggingface-sentencesimilarity-multilingual-e5-large', 'https://huggingface.co/intfloat/multilingual-e5-large'),
+ 'e5-large': ('huggingface-sentencesimilarity-e5-large', 'https://huggingface.co/intfloat/e5-large'),
+ 'gte-small': ('huggingface-sentencesimilarity-gte-small', 'https://huggingface.co/thenlper/gte-small'),
+ 'e5-base': ('huggingface-sentencesimilarity-e5-base', 'https://huggingface.co/intfloat/e5-base'),
+ 'e5-small-v2': ('huggingface-sentencesimilarity-e5-small-v2', 'https://huggingface.co/intfloat/e5-small-v2'),
+ 'multilingual-e5-base': ('huggingface-sentencesimilarity-multilingual-e5-base', 'https://huggingface.co/intfloat/multilingual-e5-base'),
+ 'all-MiniLM-L6-v2': ('huggingface-sentencesimilarity-all-MiniLM-L6-v2', 'https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2'),
+}
+
+_VALID_EMBEDDINGS_INSTANCE_TYPES = [
+ ('ml.g5.2xlarge', 'https://instances.vantage.sh/aws/ec2/g5.2xlarge'),
+ ('ml.g5.4xlarge', 'https://instances.vantage.sh/aws/ec2/g5.4xlarge'),
+ ('ml.g5.12xlarge', 'https://instances.vantage.sh/aws/ec2/g5.12xlarge'),
+ ('ml.g5.24xlarge', 'https://instances.vantage.sh/aws/ec2/g5.24xlarge'),
+ ('ml.g5.48xlarge', 'https://instances.vantage.sh/aws/ec2/g5.48xlarge'),
+ ('ml.p3dn.24xlarge', 'https://instances.vantage.sh/aws/ec2/p3dn.24xlarge'),
+ ('ml.p3.2xlarge', 'https://instances.vantage.sh/aws/ec2/p3.2xlarge'),
+ ('ml.p3.8xlarge', 'https://instances.vantage.sh/aws/ec2/p3.8xlarge'),
+ ('ml.p3.16xlarge', 'https://instances.vantage.sh/aws/ec2/p3.16xlarge'),
+]
+
+
@click.group()
def llm():
"""
@@ -22,6 +110,91 @@ def llm():
"""
pass
+@llm.command()
+def platforms():
+ """
+ Commands to list supported platforms for LLM models
+ """
+ logger.info("Platforms:")
+ logger.info(" - OpenAI: https://platform.openai.com/docs/overview")
+ logger.info(" - AWS Sagemaker: https://aws.amazon.com/sagemaker")
+
+@llm.command()
+@click.option(
+ '--all',
+ is_flag=True,
+ show_default=True,
+ default=False,
+ help='Show all LLM models.'
+)
+@click.option(
+ '--chat-completions',
+ is_flag=True,
+ show_default=True,
+ default=False,
+ help='Show chat completions models.'
+)
+@click.option(
+ '--image-creations',
+ is_flag=True,
+ show_default=True,
+ default=False,
+ help='Show image creations models.'
+)
+@click.option(
+ '--embeddings',
+ is_flag=True,
+ show_default=True,
+ default=False,
+ help='Show embeddings models.'
+)
+def sagemaker_models(all, chat_completions, image_creations, embeddings):
+ """
+ Command to list available LLM models
+ """
+ logger.info(ASCII_LOGO)
+
+ if not any([all, chat_completions, image_creations, embeddings]):
+ logger.error("At least one of the flags --all, --chat-completions, --image-creations, --embeddings must be defined.")
+ sys.exit(-1)
+
+ if all:
+ chat_completions, image_creations, embeddings = True, True, True
+
+ logger.info("Available LLM models:\n")
+
+ if chat_completions:
+ logger.info("Chat Completions:")
+ for model_id, (model_name, model_url) in _MAPPING_CHAT_COMPLETIONS_MODEL_ID_TO_MODEL_NAME.items():
+ logger.info(" - Model: {}".format(model_id))
+ logger.info(" Model URL: {}".format(model_url))
+ logger.info(" Instance Types:")
+ for instance_type, instance_url in _VALID_INSTANCE_TYPES_PER_CHAT_COMPLETIONS_MODEL[model_name]:
+ logger.info(" - Instance Type: {}".format(instance_type))
+ logger.info(" Instance URL: {}".format(instance_url))
+ logger.info("\n")
+
+ if image_creations:
+ logger.info("Image Creations:")
+ for model_id, (model_name, model_url) in _MAPPING_IMAGE_CREATION_MODEL_ID_TO_MODEL_NAME.items():
+ logger.info(" - Model: {}".format(model_id))
+ logger.info(" Model URL: {}".format(model_url))
+ logger.info(" Instance Types:")
+ for instance_type, instance_url in _VALID_INSTANCE_TYPES_PER_IMAGE_CREATIONS_MODEL[model_name]:
+ logger.info(" - Instance Type: {}".format(instance_type))
+ logger.info(" Instance URL: {}".format(instance_url))
+ logger.info("\n")
+
+ if embeddings:
+ logger.info("\nEmbeddings:")
+ for model_id, (model_name, model_url) in _MAPPING_EMBEDDINGS_MODEL_ID_TO_MODEL_NAME.items():
+ logger.info(" - Model: {}".format(model_id))
+ logger.info(" Model URL: {}".format(model_url))
+ logger.info(" Instance Types:")
+ for instance_type, instance_url in _VALID_EMBEDDINGS_INSTANCE_TYPES:
+ logger.info(" - Instance Type: {}".format(instance_type))
+ logger.info(" Instance URL: {}".format(instance_url))
+
@llm.command()
@click.option(
@@ -104,20 +277,17 @@ def start(
# Default configuration
default_config = {
'chat_completions': {
- 'model': 'meta-textgeneration-llama-2-7b-f',
- 'model_version': '1.*',
+ 'model': 'llama-2-7b',
'instance_type': 'ml.g5.2xlarge',
'num_instances': 1,
},
'image_creations': {
- 'model': 'model-txt2img-stabilityai-stable-diffusion-v2-1-base',
- 'model_version': '1.*',
+ 'model': 'stabilityai-stable-diffusion-v2-1-base',
'instance_type': 'ml.p3.2xlarge',
'num_instances': 1,
},
'embeddings': {
- 'model': 'huggingface-sentencesimilarity-gte-small',
- 'model_version': '1.*',
+ 'model': 'gte-small',
'instance_type': 'ml.g5.2xlarge',
'num_instances': 1,
},
@@ -139,9 +309,27 @@ def start(
}
if chat_completions:
+ if default_config['chat_completions']['model'] not in _MAPPING_CHAT_COMPLETIONS_MODEL_ID_TO_MODEL_NAME:
+ raise ValueError(
+ "Invalid chat completions model id. Available model ids: {}".format(
+ list(_MAPPING_CHAT_COMPLETIONS_MODEL_ID_TO_MODEL_NAME.keys())
+ )
+ )
+
+ if default_config['chat_completions']['instance_type'] not in _VALID_INSTANCE_TYPES_PER_CHAT_COMPLETIONS_MODEL[
+ _MAPPING_CHAT_COMPLETIONS_MODEL_ID_TO_MODEL_NAME[default_config['chat_completions']['model']][0]
+ ]:
+ raise ValueError(
+ "Invalid instance type for chat completions model. Available instance types: {}".format(
+ _VALID_INSTANCE_TYPES_PER_CHAT_COMPLETIONS_MODEL[
+ _MAPPING_CHAT_COMPLETIONS_MODEL_ID_TO_MODEL_NAME[default_config['chat_completions']['model']][0]
+ ]
+ )
+ )
+
chat_endpoint_name, _ = api_cloud.foundation_model_deploy(
- model_id=default_config['chat_completions']['model'],
- model_version=default_config['chat_completions']['model_version'],
+ model_id=_MAPPING_CHAT_COMPLETIONS_MODEL_ID_TO_MODEL_NAME[default_config['chat_completions']['model']][0],
+ model_version='1.*',
num_instances=default_config['chat_completions']['num_instances'],
ec2_type=default_config['chat_completions']['instance_type'],
aws_region=aws_region,
@@ -155,9 +343,27 @@ def start(
logger.info("Chat Completions Endpoint Name: {}".format(chat_endpoint_name))
if image_creations:
+ if default_config['image_creations']['model'] not in _MAPPING_IMAGE_CREATION_MODEL_ID_TO_MODEL_NAME:
+ raise ValueError(
+ "Invalid image creations model id. Available model ids: {}".format(
+ list(_MAPPING_IMAGE_CREATION_MODEL_ID_TO_MODEL_NAME.keys())
+ )
+ )
+
+ if default_config['image_creations']['instance_type'] not in _VALID_INSTANCE_TYPES_PER_IMAGE_CREATIONS_MODEL[
+ _MAPPING_IMAGE_CREATION_MODEL_ID_TO_MODEL_NAME[default_config['image_creations']['model']][0]
+ ]:
+ raise ValueError(
+ "Invalid instance type for image creations model. Available instance types: {}".format(
+ _VALID_INSTANCE_TYPES_PER_IMAGE_CREATIONS_MODEL[
+ _MAPPING_IMAGE_CREATION_MODEL_ID_TO_MODEL_NAME[default_config['image_creations']['model']][0]
+ ]
+ )
+ )
+
image_endpoint_name, _ = api_cloud.foundation_model_deploy(
- model_id=default_config['image_creations']['model'],
- model_version=default_config['image_creations']['model_version'],
+ model_id=_MAPPING_IMAGE_CREATION_MODEL_ID_TO_MODEL_NAME[default_config['image_creations']['model']][0],
+ model_version='1.*',
num_instances=default_config['image_creations']['num_instances'],
ec2_type=default_config['image_creations']['instance_type'],
aws_region=aws_region,
@@ -171,9 +377,23 @@ def start(
logger.info("Image Creations Endpoint Name: {}".format(image_endpoint_name))
if embeddings:
+ if default_config['embeddings']['model'] not in _MAPPING_EMBEDDINGS_MODEL_ID_TO_MODEL_NAME:
+ raise ValueError(
+ "Invalid embeddings model id. Available model ids: {}".format(
+ list(_MAPPING_EMBEDDINGS_MODEL_ID_TO_MODEL_NAME.keys())
+ )
+ )
+
+ if default_config['embeddings']['instance_type'] not in _VALID_EMBEDDINGS_INSTANCE_TYPES:
+ raise ValueError(
+ "Invalid instance type for embeddings model. Available instance types: {}".format(
+ _VALID_EMBEDDINGS_INSTANCE_TYPES
+ )
+ )
+
embeddings_endpoint_name, _ = api_cloud.foundation_model_deploy(
- model_id=default_config['embeddings']['model'],
- model_version=default_config['embeddings']['model_version'],
+ model_id=_MAPPING_EMBEDDINGS_MODEL_ID_TO_MODEL_NAME[default_config['embeddings']['model']][0],
+ model_version='1.*',
num_instances=default_config['embeddings']['num_instances'],
ec2_type=default_config['embeddings']['instance_type'],
aws_region=aws_region,
@@ -289,6 +509,17 @@ def stop(
logger.info("{}".format(e))
sys.exit(-1)
+@llm.command()
+def start_local_gateway():
+ """
+ Command to start local gateway
+ """
+ logger.info(ASCII_LOGO)
+ logger.info("Starting local gateway...\n")
+ from sagify.llm_gateway.main import start_server
+ start_server()
+llm.add_command(platforms)
+llm.add_command(sagemaker_models)
llm.add_command(start)
llm.add_command(stop)
diff --git a/sagify/llm_gateway/main.py b/sagify/llm_gateway/main.py
index 31d4d96..19f993b 100644
--- a/sagify/llm_gateway/main.py
+++ b/sagify/llm_gateway/main.py
@@ -14,6 +14,8 @@
app.include_router(api_router)
app.add_exception_handler(InternalServerError, internal_server_error_handler)
+def start_server():
+ uvicorn.run("sagify.llm_gateway.main:app", port=8080, host="0.0.0.0")
if __name__ == "__main__":
- uvicorn.run("sagify.llm_gateway.main:app", port=8080, host="0.0.0.0")
+ start_server()