From d2a8028b3b992d1765bb32dad8d93931892df47c Mon Sep 17 00:00:00 2001
From: pm3310 <p.mitsoulis@gmail.com>
Date: Sat, 17 Feb 2024 12:59:36 +0000
Subject: [PATCH] LLM Platform

---
 sagify/commands/llm.py     | 255 +++++++++++++++++++++++++++++++++++--
 sagify/llm_gateway/main.py |   4 +-
 2 files changed, 246 insertions(+), 13 deletions(-)

diff --git a/sagify/commands/llm.py b/sagify/commands/llm.py
index 34cb237..9016cee 100644
--- a/sagify/commands/llm.py
+++ b/sagify/commands/llm.py
@@ -15,6 +15,94 @@
 click.disable_unicode_literals_warning = True
 
 
+_MAPPING_CHAT_COMPLETIONS_MODEL_ID_TO_MODEL_NAME = {
+    'llama-2-7b': ('meta-textgeneration-llama-2-7b-f', 'https://huggingface.co/meta-llama/Llama-2-7b'),
+    'llama-2-13b': ('meta-textgeneration-llama-2-13b-f', 'https://huggingface.co/meta-llama/Llama-2-13b'),
+    'llama-2-70b': ('meta-textgeneration-llama-2-70b-f', 'https://huggingface.co/meta-llama/Llama-2-70b'),
+}
+
+_VALID_INSTANCE_TYPES_PER_CHAT_COMPLETIONS_MODEL = {
+    'meta-textgeneration-llama-2-7b-f': [
+        ('ml.g5.2xlarge', 'https://instances.vantage.sh/aws/ec2/g5.2xlarge'),
+        ('ml.g5.4xlarge', 'https://instances.vantage.sh/aws/ec2/g5.4xlarge'),
+        ('ml.g5.12xlarge', 'https://instances.vantage.sh/aws/ec2/g5.12xlarge'),
+        ('ml.g5.24xlarge', 'https://instances.vantage.sh/aws/ec2/g5.24xlarge'),
+        ('ml.g5.48xlarge', 'https://instances.vantage.sh/aws/ec2/g5.48xlarge'),
+        ('ml.p3dn.24xlarge', 'https://instances.vantage.sh/aws/ec2/p3dn.24xlarge'),
+    ],
+    'meta-textgeneration-llama-2-13b-f': [
+        ('ml.g5.12xlarge', 'https://instances.vantage.sh/aws/ec2/g5.12xlarge'),
+        ('ml.g5.24xlarge', 'https://instances.vantage.sh/aws/ec2/g5.24xlarge'),
+        ('ml.g5.48xlarge', 'https://instances.vantage.sh/aws/ec2/g5.48xlarge'),
+    ],
+    'meta-textgeneration-llama-2-70b-f': [
+        ('ml.g5.48xlarge', 'https://instances.vantage.sh/aws/ec2/g5.48xlarge'),
+    ],
+}
+
+_MAPPING_IMAGE_CREATION_MODEL_ID_TO_MODEL_NAME = {
+    'stabilityai-stable-diffusion-v2': (
+        'model-txt2img-stabilityai-stable-diffusion-v2', 
+        'https://huggingface.co/stabilityai/stable-diffusion-2'
+    ),
+    'stabilityai-stable-diffusion-v2-1-base': (
+        'model-txt2img-stabilityai-stable-diffusion-v2-1-base', 
+        'https://huggingface.co/stabilityai/stable-diffusion-2-1-base'
+    ),
+    'stabilityai-stable-diffusion-v2-fp16': (
+        'model-txt2img-stabilityai-stable-diffusion-v2-fp16',
+        'https://huggingface.co/stabilityai/stable-diffusion-2/tree/fp16'
+    )
+}
+
+_VALID_INSTANCE_TYPES_PER_IMAGE_CREATIONS_MODEL = {
+    'model-txt2img-stabilityai-stable-diffusion-v2': [
+        ('ml.p3.2xlarge', 'https://instances.vantage.sh/aws/ec2/p3.2xlarge'),
+        ('ml.g4dn.2xlarge', 'https://instances.vantage.sh/aws/ec2/g4dn.2xlarge'),
+        ('ml.g5.2xlarge', 'https://instances.vantage.sh/aws/ec2/g5.2xlarge'),
+    ],
+    'model-txt2img-stabilityai-stable-diffusion-v2-1-base': [
+        ('ml.p3.2xlarge', 'https://instances.vantage.sh/aws/ec2/p3.2xlarge'),
+        ('ml.g4dn.2xlarge', 'https://instances.vantage.sh/aws/ec2/g4dn.2xlarge'),
+        ('ml.g5.2xlarge', 'https://instances.vantage.sh/aws/ec2/g5.2xlarge'),
+    ],
+    'model-txt2img-stabilityai-stable-diffusion-v2-fp16': [
+        ('ml.p3.2xlarge', 'https://instances.vantage.sh/aws/ec2/p3.2xlarge'),
+        ('ml.g4dn.2xlarge', 'https://instances.vantage.sh/aws/ec2/g4dn.2xlarge'),
+        ('ml.g5.2xlarge', 'https://instances.vantage.sh/aws/ec2/g5.2xlarge'),
+    ],
+}
+
+_MAPPING_EMBEDDINGS_MODEL_ID_TO_MODEL_NAME = {
+    'bge-large-en': ('huggingface-sentencesimilarity-bge-large-en', 'https://huggingface.co/BAAI/bge-large-en'),
+    'bge-base-en': ('huggingface-sentencesimilarity-bge-base-en', 'https://huggingface.co/BAAI/bge-base-en'),
+    'gte-large': ('huggingface-sentencesimilarity-gte-large', 'https://huggingface.co/thenlper/gte-large'),
+    'gte-base': ('huggingface-sentencesimilaritygte-base', 'https://huggingface.co/thenlper/gte-base'),
+    'e5-large-v2': ('huggingface-sentencesimilarity-e5-large-v2', 'https://huggingface.co/intfloat/e5-large-v2'),
+    'bge-small-en': ('huggingface-sentencesimilarity-bge-small-en', 'https://huggingface.co/BAAI/bge-small-en'),
+    'e5-base-v2': ('huggingface-sentencesimilarity-e5-base-v2', 'https://huggingface.co/intfloat/e5-base-v2'),
+    'multilingual-e5-large': ('huggingface-sentencesimilarity-multilingual-e5-large', 'https://huggingface.co/intfloat/multilingual-e5-large'),
+    'e5-large': ('huggingface-sentencesimilarity-e5-large', 'https://huggingface.co/intfloat/e5-large'),
+    'gte-small': ('huggingface-sentencesimilarity-gte-small', 'https://huggingface.co/thenlper/gte-small'),
+    'e5-base': ('huggingface-sentencesimilarity-e5-base', 'https://huggingface.co/intfloat/e5-base'),
+    'e5-small-v2': ('huggingface-sentencesimilarity-e5-small-v2', 'https://huggingface.co/intfloat/e5-small-v2'),
+    'multilingual-e5-base': ('huggingface-sentencesimilarity-multilingual-e5-base', 'https://huggingface.co/intfloat/multilingual-e5-base'),
+    'all-MiniLM-L6-v2': ('huggingface-sentencesimilarity-all-MiniLM-L6-v2', 'https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2'),
+}
+
+_VALID_EMBEDDINGS_INSTANCE_TYPES = [
+    ('ml.g5.2xlarge', 'https://instances.vantage.sh/aws/ec2/g5.2xlarge'),
+    ('ml.g5.4xlarge', 'https://instances.vantage.sh/aws/ec2/g5.4xlarge'),
+    ('ml.g5.12xlarge', 'https://instances.vantage.sh/aws/ec2/g5.12xlarge'),
+    ('ml.g5.24xlarge', 'https://instances.vantage.sh/aws/ec2/g5.24xlarge'),
+    ('ml.g5.48xlarge', 'https://instances.vantage.sh/aws/ec2/g5.48xlarge'),
+    ('ml.p3dn.24xlarge', 'https://instances.vantage.sh/aws/ec2/p3dn.24xlarge'),
+    ('ml.p3.2xlarge', 'https://instances.vantage.sh/aws/ec2/p3.2xlarge'),
+    ('ml.p3.8xlarge', 'https://instances.vantage.sh/aws/ec2/p3.8xlarge'),
+    ('ml.p3.16xlarge', 'https://instances.vantage.sh/aws/ec2/p3.16xlarge'),
+]
+    
+
 @click.group()
 def llm():
     """
@@ -22,6 +110,91 @@ def llm():
     """
     pass
 
+@llm.command()
+def platforms():
+    """
+    Commands to list supported platforms for LLM models
+    """
+    logger.info("Platforms:")
+    logger.info("  - OpenAI: https://platform.openai.com/docs/overview")
+    logger.info("  - AWS Sagemaker: https://aws.amazon.com/sagemaker")
+
+@llm.command()
+@click.option(
+    '--all',
+    is_flag=True,
+    show_default=True,
+    default=False,
+    help='Show all LLM models.'
+)
+@click.option(
+    '--chat-completions',
+    is_flag=True,
+    show_default=True,
+    default=False,
+    help='Show chat completions models.'
+)
+@click.option(
+    '--image-creations',
+    is_flag=True,
+    show_default=True,
+    default=False,
+    help='Show image creations models.'
+)
+@click.option(
+    '--embeddings',
+    is_flag=True,
+    show_default=True,
+    default=False,
+    help='Show embeddings models.'
+)
+def sagemaker_models(all, chat_completions, image_creations, embeddings):
+    """
+    Command to list available LLM models
+    """
+    logger.info(ASCII_LOGO)
+
+    if not any([all, chat_completions, image_creations, embeddings]):
+        logger.error("At least one of the flags --all, --chat-completions, --image-creations, --embeddings must be defined.")
+        sys.exit(-1)
+
+    if all:
+        chat_completions, image_creations, embeddings = True, True, True
+
+    logger.info("Available LLM models:\n")
+
+    if chat_completions:
+        logger.info("Chat Completions:")
+        for model_id, (model_name, model_url) in _MAPPING_CHAT_COMPLETIONS_MODEL_ID_TO_MODEL_NAME.items():
+            logger.info("  - Model: {}".format(model_id))
+            logger.info("    Model URL: {}".format(model_url))
+            logger.info("    Instance Types:")
+            for instance_type, instance_url in _VALID_INSTANCE_TYPES_PER_CHAT_COMPLETIONS_MODEL[model_name]:
+                logger.info("      - Instance Type: {}".format(instance_type))
+                logger.info("        Instance URL: {}".format(instance_url))
+        logger.info("\n")
+
+    if image_creations:
+        logger.info("Image Creations:")
+        for model_id, (model_name, model_url) in _MAPPING_IMAGE_CREATION_MODEL_ID_TO_MODEL_NAME.items():
+            logger.info("  - Model: {}".format(model_id))
+            logger.info("    Model URL: {}".format(model_url))
+            logger.info("    Instance Types:")
+            for instance_type, instance_url in _VALID_INSTANCE_TYPES_PER_IMAGE_CREATIONS_MODEL[model_name]:
+                logger.info("      - Instance Type: {}".format(instance_type))
+                logger.info("        Instance URL: {}".format(instance_url))
+        logger.info("\n")
+    
+    if embeddings:
+        logger.info("\nEmbeddings:")
+        for model_id, (model_name, model_url) in _MAPPING_EMBEDDINGS_MODEL_ID_TO_MODEL_NAME.items():
+            logger.info("  - Model: {}".format(model_id))
+            logger.info("    Model URL: {}".format(model_url))
+            logger.info("    Instance Types:")
+            for instance_type, instance_url in _VALID_EMBEDDINGS_INSTANCE_TYPES:
+                logger.info("      - Instance Type: {}".format(instance_type))
+                logger.info("        Instance URL: {}".format(instance_url))
+
 
 @llm.command()
 @click.option(
@@ -104,20 +277,17 @@ def start(
     # Default configuration
     default_config = {
         'chat_completions': {
-            'model': 'meta-textgeneration-llama-2-7b-f',
-            'model_version': '1.*',
+            'model': 'llama-2-7b',
             'instance_type': 'ml.g5.2xlarge',
             'num_instances': 1,
         },
         'image_creations': {
-            'model': 'model-txt2img-stabilityai-stable-diffusion-v2-1-base',
-            'model_version': '1.*',
+            'model': 'stabilityai-stable-diffusion-v2-1-base',
             'instance_type': 'ml.p3.2xlarge',
             'num_instances': 1,
         },
         'embeddings': {
-            'model': 'huggingface-sentencesimilarity-gte-small',
-            'model_version': '1.*',
+            'model': 'gte-small',
             'instance_type': 'ml.g5.2xlarge',
             'num_instances': 1,
         },
@@ -139,9 +309,27 @@ def start(
         }
 
         if chat_completions:
+            if default_config['chat_completions']['model'] not in _MAPPING_CHAT_COMPLETIONS_MODEL_ID_TO_MODEL_NAME:
+                raise ValueError(
+                    "Invalid chat completions model id. Available model ids: {}".format(
+                        list(_MAPPING_CHAT_COMPLETIONS_MODEL_ID_TO_MODEL_NAME.keys())
+                    )
+                )
+            
+            if default_config['chat_completions']['instance_type'] not in _VALID_INSTANCE_TYPES_PER_CHAT_COMPLETIONS_MODEL[
+                _MAPPING_CHAT_COMPLETIONS_MODEL_ID_TO_MODEL_NAME[default_config['chat_completions']['model']][0]
+            ]:
+                raise ValueError(
+                    "Invalid instance type for chat completions model. Available instance types: {}".format(
+                        _VALID_INSTANCE_TYPES_PER_CHAT_COMPLETIONS_MODEL[
+                            _MAPPING_CHAT_COMPLETIONS_MODEL_ID_TO_MODEL_NAME[default_config['chat_completions']['model']][0]
+                        ]
+                    )
+                )
+
             chat_endpoint_name, _ = api_cloud.foundation_model_deploy(
-                model_id=default_config['chat_completions']['model'],
-                model_version=default_config['chat_completions']['model_version'],
+                model_id=_MAPPING_CHAT_COMPLETIONS_MODEL_ID_TO_MODEL_NAME[default_config['chat_completions']['model']][0],
+                model_version='1.*',
                 num_instances=default_config['chat_completions']['num_instances'],
                 ec2_type=default_config['chat_completions']['instance_type'],
                 aws_region=aws_region,
@@ -155,9 +343,27 @@ def start(
             logger.info("Chat Completions Endpoint Name: {}".format(chat_endpoint_name))
 
         if image_creations:
+            if default_config['image_creations']['model'] not in _MAPPING_IMAGE_CREATION_MODEL_ID_TO_MODEL_NAME:
+                raise ValueError(
+                    "Invalid image creations model id. Available model ids: {}".format(
+                        list(_MAPPING_IMAGE_CREATION_MODEL_ID_TO_MODEL_NAME.keys())
+                    )
+                )
+            
+            if default_config['image_creations']['instance_type'] not in _VALID_INSTANCE_TYPES_PER_IMAGE_CREATIONS_MODEL[
+                _MAPPING_IMAGE_CREATION_MODEL_ID_TO_MODEL_NAME[default_config['image_creations']['model']][0]
+            ]:
+                raise ValueError(
+                    "Invalid instance type for image creations model. Available instance types: {}".format(
+                        _VALID_INSTANCE_TYPES_PER_IMAGE_CREATIONS_MODEL[
+                            _MAPPING_IMAGE_CREATION_MODEL_ID_TO_MODEL_NAME[default_config['image_creations']['model']][0]
+                        ]
+                    )
+                )
+            
             image_endpoint_name, _ = api_cloud.foundation_model_deploy(
-                model_id=default_config['image_creations']['model'],
-                model_version=default_config['image_creations']['model_version'],
+                model_id=_MAPPING_IMAGE_CREATION_MODEL_ID_TO_MODEL_NAME[default_config['image_creations']['model']][0],
+                model_version='1.*',
                 num_instances=default_config['image_creations']['num_instances'],
                 ec2_type=default_config['image_creations']['instance_type'],
                 aws_region=aws_region,
@@ -171,9 +377,23 @@ def start(
             logger.info("Image Creations Endpoint Name: {}".format(image_endpoint_name))
 
         if embeddings:
+            if default_config['embeddings']['model'] not in _MAPPING_EMBEDDINGS_MODEL_ID_TO_MODEL_NAME:
+                raise ValueError(
+                    "Invalid embeddings model id. Available model ids: {}".format(
+                        list(_MAPPING_EMBEDDINGS_MODEL_ID_TO_MODEL_NAME.keys())
+                    )
+                )
+            
+            if default_config['embeddings']['instance_type'] not in _VALID_EMBEDDINGS_INSTANCE_TYPES:
+                raise ValueError(
+                    "Invalid instance type for embeddings model. Available instance types: {}".format(
+                        _VALID_EMBEDDINGS_INSTANCE_TYPES
+                    )
+                )
+
             embeddings_endpoint_name, _ = api_cloud.foundation_model_deploy(
-                model_id=default_config['embeddings']['model'],
-                model_version=default_config['embeddings']['model_version'],
+                model_id=_MAPPING_EMBEDDINGS_MODEL_ID_TO_MODEL_NAME[default_config['embeddings']['model']][0],
+                model_version='1.*',
                 num_instances=default_config['embeddings']['num_instances'],
                 ec2_type=default_config['embeddings']['instance_type'],
                 aws_region=aws_region,
@@ -289,6 +509,17 @@ def stop(
         logger.info("{}".format(e))
         sys.exit(-1)
 
+@llm.command()
+def start_local_gateway():
+    """
+    Command to start local gateway
+    """
+    logger.info(ASCII_LOGO)
+    logger.info("Starting local gateway...\n")
+    from sagify.llm_gateway.main import start_server
+    start_server()
 
+llm.add_command(platforms)
+llm.add_command(sagemaker_models)
 llm.add_command(start)
 llm.add_command(stop)
diff --git a/sagify/llm_gateway/main.py b/sagify/llm_gateway/main.py
index 31d4d96..19f993b 100644
--- a/sagify/llm_gateway/main.py
+++ b/sagify/llm_gateway/main.py
@@ -14,6 +14,8 @@
 app.include_router(api_router)
 app.add_exception_handler(InternalServerError, internal_server_error_handler)
 
+def start_server():
+    uvicorn.run("sagify.llm_gateway.main:app", port=8080, host="0.0.0.0")
 
 if __name__ == "__main__":
-    uvicorn.run("sagify.llm_gateway.main:app", port=8080, host="0.0.0.0")
+    start_server()