From 55122880e07305a5d13aad1726fc8b31d5840cbf Mon Sep 17 00:00:00 2001
From: pm3310 <p.mitsoulis@gmail.com>
Date: Fri, 19 Jan 2024 21:37:46 +0000
Subject: [PATCH] LLM infra commands

---
 README.md                     |  70 ++++++++
 docs/index.md                 |  70 ++++++++
 sagify/__main__.py            |   2 +
 sagify/commands/llm.py        | 247 +++++++++++++++++++++++++++
 sagify/sagemaker/sagemaker.py |   7 +
 tests/commands/test_llm.py    | 307 ++++++++++++++++++++++++++++++++++
 6 files changed, 703 insertions(+)
 create mode 100644 sagify/commands/llm.py
 create mode 100644 tests/commands/test_llm.py

diff --git a/README.md b/README.md
index 1b7c272..8fd6aa9 100644
--- a/README.md
+++ b/README.md
@@ -967,3 +967,73 @@ This command deploys a Foundation model without code.
 `--external-id EXTERNAL_ID` or `-x EXTERNAL_ID`: Optional external id used when using an IAM role
 
 `--endpoint-name ENDPOINT_NAME`: Optional name for the SageMaker endpoint
+
+
+### LLM Start Infrastructure
+
+#### Name
+
+Command to start LLM infrastructure
+
+#### Synopsis
+```sh
+sagify llm start [--all] [--chat-completions] [--image-creations] [--embeddings] [--config EC2_CONFIG_FILE] --aws-profile AWS_PROFILE --aws-region AWS_REGION [--aws-tags TAGS] [--iam-role-arn IAM_ROLE] [--external-id EXTERNAL_ID]
+```
+
+#### Description
+
+It spins up the endpoints for chat completions, image creation and embeddings.
+
+#### Required Flags
+
+`--all`: Start infrastructure for all services.
+
+`--chat-completions`: Start infrastructure for chat completions.
+
+`--image-creations`: Start infrastructure for image creations.
+
+`--embeddings`: Start infrastructure for embeddings.
+
+`--config EC2_CONFIG_FILE`: Path to config file to override foundation models, ec2 instance types and/or number of instances.
+
+`--aws-profile AWS_PROFILE`: The AWS profile to use for the lightning deploy command
+
+`--aws-region AWS_REGION`: The AWS region to use for the lightning deploy command
+
+#### Optional Flags
+
+`--aws-tags TAGS` or `-a TAGS`: Tags for labeling a training job of the form `tag1=value1;tag2=value2`. For more, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html.
+
+`--iam-role-arn IAM_ROLE` or `-r IAM_ROLE`: AWS IAM role to use for deploying with *SageMaker*
+
+`--external-id EXTERNAL_ID` or `-x EXTERNAL_ID`: Optional external id used when using an IAM role
+
+
+### LLM Stop Infrastructure
+
+#### Name
+
+Command to stop LLM infrastructure
+
+#### Synopsis
+```sh
+sagify llm stop --aws-profile AWS_PROFILE --aws-region AWS_REGION [--aws-tags TAGS] [--iam-role-arn IAM_ROLE] [--external-id EXTERNAL_ID]
+```
+
+#### Description
+
+It stop all the services that are running.
+
+#### Required Flags
+
+`--aws-profile AWS_PROFILE`: The AWS profile to use for the lightning deploy command
+
+`--aws-region AWS_REGION`: The AWS region to use for the lightning deploy command
+
+#### Optional Flags
+
+`--aws-tags TAGS` or `-a TAGS`: Tags for labeling a training job of the form `tag1=value1;tag2=value2`. For more, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html.
+
+`--iam-role-arn IAM_ROLE` or `-r IAM_ROLE`: AWS IAM role to use for deploying with *SageMaker*
+
+`--external-id EXTERNAL_ID` or `-x EXTERNAL_ID`: Optional external id used when using an IAM role
\ No newline at end of file
diff --git a/docs/index.md b/docs/index.md
index efcdb81..6ed8a34 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1674,3 +1674,73 @@ This command deploys a Foundation model without code.
 `--external-id EXTERNAL_ID` or `-x EXTERNAL_ID`: Optional external id used when using an IAM role
 
 `--endpoint-name ENDPOINT_NAME`: Optional name for the SageMaker endpoint
+
+
+### LLM Start Infrastructure
+
+#### Name
+
+Command to start LLM infrastructure
+
+#### Synopsis
+```sh
+sagify llm start [--all] [--chat-completions] [--image-creations] [--embeddings] [--config EC2_CONFIG_FILE] --aws-profile AWS_PROFILE --aws-region AWS_REGION [--aws-tags TAGS] [--iam-role-arn IAM_ROLE] [--external-id EXTERNAL_ID]
+```
+
+#### Description
+
+It spins up the endpoints for chat completions, image creation and embeddings.
+
+#### Required Flags
+
+`--all`: Start infrastructure for all services.
+
+`--chat-completions`: Start infrastructure for chat completions.
+
+`--image-creations`: Start infrastructure for image creations.
+
+`--embeddings`: Start infrastructure for embeddings.
+
+`--config EC2_CONFIG_FILE`: Path to config file to override foundation models, ec2 instance types and/or number of instances.
+
+`--aws-profile AWS_PROFILE`: The AWS profile to use for the lightning deploy command
+
+`--aws-region AWS_REGION`: The AWS region to use for the lightning deploy command
+
+#### Optional Flags
+
+`--aws-tags TAGS` or `-a TAGS`: Tags for labeling a training job of the form `tag1=value1;tag2=value2`. For more, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html.
+
+`--iam-role-arn IAM_ROLE` or `-r IAM_ROLE`: AWS IAM role to use for deploying with *SageMaker*
+
+`--external-id EXTERNAL_ID` or `-x EXTERNAL_ID`: Optional external id used when using an IAM role
+
+
+### LLM Stop Infrastructure
+
+#### Name
+
+Command to stop LLM infrastructure
+
+#### Synopsis
+```sh
+sagify llm stop --aws-profile AWS_PROFILE --aws-region AWS_REGION [--aws-tags TAGS] [--iam-role-arn IAM_ROLE] [--external-id EXTERNAL_ID]
+```
+
+#### Description
+
+It stop all the services that are running.
+
+#### Required Flags
+
+`--aws-profile AWS_PROFILE`: The AWS profile to use for the lightning deploy command
+
+`--aws-region AWS_REGION`: The AWS region to use for the lightning deploy command
+
+#### Optional Flags
+
+`--aws-tags TAGS` or `-a TAGS`: Tags for labeling a training job of the form `tag1=value1;tag2=value2`. For more, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html.
+
+`--iam-role-arn IAM_ROLE` or `-r IAM_ROLE`: AWS IAM role to use for deploying with *SageMaker*
+
+`--external-id EXTERNAL_ID` or `-x EXTERNAL_ID`: Optional external id used when using an IAM role
diff --git a/sagify/__main__.py b/sagify/__main__.py
index 16e2748..67ae5c8 100644
--- a/sagify/__main__.py
+++ b/sagify/__main__.py
@@ -6,6 +6,7 @@
 from sagify.commands.build import build
 from sagify.commands.cloud import cloud
 from sagify.commands.initialize import init
+from sagify.commands.llm import llm
 from sagify.commands.local import local
 from sagify.commands.push import push
 from sagify.commands.configure import configure
@@ -31,6 +32,7 @@ def add_commands(cli):
     cli.add_command(push)
     cli.add_command(cloud)
     cli.add_command(configure)
+    cli.add_command(llm)
 
 
 add_commands(cli)
diff --git a/sagify/commands/llm.py b/sagify/commands/llm.py
new file mode 100644
index 0000000..65b51bf
--- /dev/null
+++ b/sagify/commands/llm.py
@@ -0,0 +1,247 @@
+# -*- coding: utf-8 -*-
+from __future__ import print_function, unicode_literals
+
+import json
+import sys
+
+import click
+
+from sagify.api import cloud as api_cloud
+from sagify.commands import ASCII_LOGO
+from sagify.commands.custom_validators.validators import validate_tags
+from sagify.log import logger
+from sagify.config.config import ConfigManager
+from sagify.sagemaker import sagemaker
+
+click.disable_unicode_literals_warning = True
+
+
+@click.group()
+def llm():
+    """
+    Commands for LLM (Large Language Model) operations
+    """
+    pass
+
+
+@llm.command()
+@click.option(
+    '--all',
+    is_flag=True,
+    show_default=True,
+    default=False,
+    help='Start infrastructure for all services.'
+)
+@click.option(
+    '--chat-completions',
+    is_flag=True,
+    show_default=True,
+    default=False,
+    help='Start infrastructure for chat completions.'
+)
+@click.option(
+    '--image-creations',
+    is_flag=True,
+    show_default=True,
+    default=False,
+    help='Start infrastructure for image creations.'
+)
+@click.option(
+    '--embeddings',
+    is_flag=True,
+    show_default=True,
+    default=False,
+    help='Start infrastructure for embeddings.'
+)
+@click.option('--config', required=False, type=click.File('r'), help='Path to config file.')
+@click.option(
+    u"-a", u"--aws-tags",
+    callback=validate_tags,
+    required=False,
+    default=None,
+    help='Tags for labeling a training job of the form "tag1=value1;tag2=value2". For more, see '
+         'https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html.'
+)
+@click.option(
+    u"--aws-profile",
+    required=True,
+    help="The AWS profile to use for the foundation model deploy command"
+)
+@click.option(
+    u"--aws-region",
+    required=True,
+    help="The AWS region to use for the foundation model deploy command"
+)
+@click.option(
+    u"-r",
+    u"--iam-role-arn",
+    required=False,
+    help="The AWS role to use for the foundation model deploy command"
+)
+@click.option(
+    u"-x",
+    u"--external-id",
+    required=False,
+    help="Optional external id used when using an IAM role"
+)
+def start(
+        all,
+        chat_completions,
+        image_creations,
+        embeddings,
+        config,
+        aws_tags,
+        aws_profile,
+        aws_region,
+        iam_role_arn,
+        external_id
+):
+    """
+    Command to start LLM infrastructure
+    """
+    logger.info(ASCII_LOGO)
+    logger.info("Starting LLM infrastructure. It will take ~15-30 mins...\n")
+
+    # Default configuration
+    default_config = {
+        'chat_completions': {
+            'model': 'meta-textgeneration-llama-2-7b-f',
+            'model_version': '1.*',
+            'instance_type': 'ml.g5.2xlarge',
+            'num_instances': 1,
+        },
+        'image_creations': {
+            'model': 'model-txt2img-stabilityai-stable-diffusion-v2-1-base',
+            'model_version': '1.*',
+            'instance_type': 'ml.p3.2xlarge',
+            'num_instances': 1,
+        },
+        'embeddings': {
+            'model': 'huggingface-sentencesimilarity-gte-small',
+            'model_version': '1.*',
+            'instance_type': 'ml.g5.2xlarge',
+            'num_instances': 1,
+        },
+    }
+
+    # Load the config file if provided
+    if config:
+        custom_config = json.load(config)
+        default_config.update(custom_config)
+
+    try:
+        if all:
+            chat_completions, image_creations, embeddings = True, True, True
+
+        llm_infra_config = {
+            'chat_completions_endpoint': None,
+            'image_creations_endpoint': None,
+            'embeddings_endpoint': None,
+        }
+
+        if chat_completions:
+            chat_endpoint_name, _ = api_cloud.foundation_model_deploy(
+                model_id=default_config['chat_completions']['model'],
+                model_version=default_config['chat_completions']['model_version'],
+                num_instances=default_config['chat_completions']['num_instances'],
+                ec2_type=default_config['chat_completions']['instance_type'],
+                aws_region=aws_region,
+                aws_profile=aws_profile,
+                aws_role=iam_role_arn,
+                external_id=external_id,
+                tags=aws_tags
+            )
+            llm_infra_config['chat_completions_endpoint'] = chat_endpoint_name
+
+            logger.info("Chat Completions Endpoint Name: {}".format(chat_endpoint_name))
+
+        if image_creations:
+            image_endpoint_name, _ = api_cloud.foundation_model_deploy(
+                model_id=default_config['image_creations']['model'],
+                model_version=default_config['image_creations']['model_version'],
+                num_instances=default_config['image_creations']['num_instances'],
+                ec2_type=default_config['image_creations']['instance_type'],
+                aws_region=aws_region,
+                aws_profile=aws_profile,
+                aws_role=iam_role_arn,
+                external_id=external_id,
+                tags=aws_tags
+            )
+            llm_infra_config['image_creations_endpoint'] = image_endpoint_name
+
+            logger.info("Image Creations Endpoint Name: {}".format(image_endpoint_name))
+
+        if embeddings:
+            embeddings_endpoint_name, _ = api_cloud.foundation_model_deploy(
+                model_id=default_config['embeddings']['model'],
+                model_version=default_config['embeddings']['model_version'],
+                num_instances=default_config['embeddings']['num_instances'],
+                ec2_type=default_config['embeddings']['instance_type'],
+                aws_region=aws_region,
+                aws_profile=aws_profile,
+                aws_role=iam_role_arn,
+                external_id=external_id,
+                tags=aws_tags
+            )
+            llm_infra_config['embeddings_endpoint'] = embeddings_endpoint_name
+
+            logger.info("Embeddings Endpoint Name: {}".format(embeddings_endpoint_name))
+
+        with open('.sagify_llm_infra.json', 'w') as f:
+            json.dump(llm_infra_config, f)
+    except ValueError as e:
+        logger.info("{}".format(e))
+        sys.exit(-1)
+
+
+@click.command()
+@click.option(
+    u"--aws-profile",
+    required=True,
+    help="The AWS profile to use for the foundation model deploy command"
+)
+@click.option(
+    u"--aws-region",
+    required=True,
+    help="The AWS region to use for the foundation model deploy command"
+)
+@click.option(
+    u"-r",
+    u"--iam-role-arn",
+    required=False,
+    help="The AWS role to use for the train command"
+)
+@click.option(
+    u"-x",
+    u"--external-id",
+    required=False,
+    help="Optional external id used when using an IAM role"
+)
+def stop(aws_profile, aws_region, iam_role_arn, external_id):
+    """
+    Command to stop LLM infrastructure
+    """
+    logger.info(ASCII_LOGO)
+    logger.info("Stopping LLM infrastructure...\n")
+
+    sagemaker_client = sagemaker.SageMakerClient(aws_profile, aws_region, iam_role_arn, external_id)
+    try:
+        with open('.sagify_llm_infra.json', 'r') as f:
+            llm_infra_config = json.load(f)
+
+        for _endpoint in ['chat_completions_endpoint', 'image_creations_endpoint', 'embeddings_endpoint']:
+            if llm_infra_config[_endpoint]:
+                try:
+                    sagemaker_client.shutdown_endpoint(llm_infra_config[_endpoint])
+                except Exception as e:
+                    logger.info("{}".format(e))
+                    sys.exit(-1)
+
+        logger.info("LLM infrastructure stopped successfully.")
+    except FileNotFoundError as e:
+        logger.info("{}".format(e))
+        sys.exit(-1)
+
+
+llm.add_command(start)
+llm.add_command(stop)
diff --git a/sagify/sagemaker/sagemaker.py b/sagify/sagemaker/sagemaker.py
index 3f051ca..8e470c9 100644
--- a/sagify/sagemaker/sagemaker.py
+++ b/sagify/sagemaker/sagemaker.py
@@ -728,6 +728,13 @@ def query_endpoint(model_predictor, payload, content_type, accept):
 
         return example_query_code_snippet
 
+    def shutdown_endpoint(self, endpoint_name):
+        """
+        Shuts down a SageMaker endpoint.
+        :param endpoint_name: [str], name of the endpoint to be shut down
+        """
+        self.sagemaker_client.delete_endpoint(EndpointName=endpoint_name)
+
     @staticmethod
     def _get_s3_bucket(s3_dir):
         """
diff --git a/tests/commands/test_llm.py b/tests/commands/test_llm.py
new file mode 100644
index 0000000..b1d09d6
--- /dev/null
+++ b/tests/commands/test_llm.py
@@ -0,0 +1,307 @@
+# -*- coding: utf-8 -*-
+import json
+import os
+try:
+    from unittest.mock import patch, call
+except ImportError:
+    from mock import patch, call
+
+from click.testing import CliRunner
+from sagify.__main__ import cli
+
+
+class TestLlmStart(object):
+    def test_start_all_happy_case(self):
+        runner = CliRunner()
+        with patch(
+            'sagify.api.cloud.foundation_model_deploy'
+        ) as mocked_foundation_model_deploy:
+            mocked_foundation_model_deploy.side_effect = [
+                ('chat_completions_endpoint', 'some code snippet 1'),
+                ('image_creations_endpoint', 'some code snippet 2'),
+                ('embeddings_endpoint', 'some code snippet 3'),
+            ]
+            with runner.isolated_filesystem():
+                result = runner.invoke(
+                    cli=cli,
+                    args=[
+                        'llm', 'start',
+                        '--all',
+                        '--aws-region', 'us-east-1',
+                        '--aws-profile', 'sagemaker-production'
+                    ]
+                )
+
+                assert mocked_foundation_model_deploy.call_count == 3
+                mocked_foundation_model_deploy.assert_has_calls(
+                    [
+                        call(
+                            model_id='meta-textgeneration-llama-2-7b-f',
+                            model_version='1.*',
+                            num_instances=1,
+                            ec2_type='ml.g5.2xlarge',
+                            aws_region='us-east-1',
+                            aws_profile='sagemaker-production',
+                            aws_role=None,
+                            external_id=None,
+                            tags=None
+                        ),
+                        call(
+                            model_id='model-txt2img-stabilityai-stable-diffusion-v2-1-base',
+                            model_version='1.*',
+                            num_instances=1,
+                            ec2_type='ml.p3.2xlarge',
+                            aws_region='us-east-1',
+                            aws_profile='sagemaker-production',
+                            aws_role=None,
+                            external_id=None,
+                            tags=None
+                        ),
+                        call(
+                            model_id='huggingface-sentencesimilarity-gte-small',
+                            model_version='1.*',
+                            num_instances=1,
+                            ec2_type='ml.g5.2xlarge',
+                            aws_region='us-east-1',
+                            aws_profile='sagemaker-production',
+                            aws_role=None,
+                            external_id=None,
+                            tags=None
+                        )
+                    ]
+                )
+
+                assert os.path.isfile('.sagify_llm_infra.json')
+
+                with open('.sagify_llm_infra.json', 'r') as f:
+                    llm_infra_config = json.load(f)
+
+                assert llm_infra_config['chat_completions_endpoint'] is not None
+                assert llm_infra_config['image_creations_endpoint'] is not None
+                assert llm_infra_config['embeddings_endpoint'] is not None
+
+                assert result.exit_code == 0
+
+    def test_start_chat_completions_only(self):
+        runner = CliRunner()
+        with patch(
+            'sagify.api.cloud.foundation_model_deploy'
+        ) as mocked_foundation_model_deploy:
+            mocked_foundation_model_deploy.side_effect = [
+                ('chat_completions_endpoint', 'some code snippet 1')
+            ]
+            with runner.isolated_filesystem():
+                result = runner.invoke(
+                    cli=cli,
+                    args=[
+                        'llm', 'start',
+                        '--chat-completions',
+                        '--aws-region', 'us-east-1',
+                        '--aws-profile', 'sagemaker-production'
+                    ]
+                )
+
+                assert mocked_foundation_model_deploy.call_count == 1
+                mocked_foundation_model_deploy.assert_called_with(
+                    model_id='meta-textgeneration-llama-2-7b-f',
+                    model_version='1.*',
+                    num_instances=1,
+                    ec2_type='ml.g5.2xlarge',
+                    aws_region='us-east-1',
+                    aws_profile='sagemaker-production',
+                    aws_role=None,
+                    external_id=None,
+                    tags=None
+                )
+
+                assert os.path.isfile('.sagify_llm_infra.json')
+
+                with open('.sagify_llm_infra.json', 'r') as f:
+                    llm_infra_config = json.load(f)
+
+                assert llm_infra_config['chat_completions_endpoint'] is not None
+                assert llm_infra_config['image_creations_endpoint'] is None
+                assert llm_infra_config['embeddings_endpoint'] is None
+
+                assert result.exit_code == 0
+
+    def test_start_image_creations_only(self):
+        runner = CliRunner()
+        with patch(
+            'sagify.api.cloud.foundation_model_deploy'
+        ) as mocked_foundation_model_deploy:
+            mocked_foundation_model_deploy.side_effect = [
+                ('image_creations_endpoint', 'some code snippet 2')
+            ]
+            with runner.isolated_filesystem():
+                result = runner.invoke(
+                    cli=cli,
+                    args=[
+                        'llm', 'start',
+                        '--image-creations',
+                        '--aws-region', 'us-east-1',
+                        '--aws-profile', 'sagemaker-production'
+                    ]
+                )
+
+                assert mocked_foundation_model_deploy.call_count == 1
+                mocked_foundation_model_deploy.assert_called_with(
+                    model_id='model-txt2img-stabilityai-stable-diffusion-v2-1-base',
+                    model_version='1.*',
+                    num_instances=1,
+                    ec2_type='ml.p3.2xlarge',
+                    aws_region='us-east-1',
+                    aws_profile='sagemaker-production',
+                    aws_role=None,
+                    external_id=None,
+                    tags=None
+                )
+
+                assert os.path.isfile('.sagify_llm_infra.json')
+
+                with open('.sagify_llm_infra.json', 'r') as f:
+                    llm_infra_config = json.load(f)
+
+                assert llm_infra_config['chat_completions_endpoint'] is None
+                assert llm_infra_config['image_creations_endpoint'] is not None
+                assert llm_infra_config['embeddings_endpoint'] is None
+
+                assert result.exit_code == 0
+
+    def test_start_embeddings_only(self):
+        runner = CliRunner()
+        with patch(
+            'sagify.api.cloud.foundation_model_deploy'
+        ) as mocked_foundation_model_deploy:
+            mocked_foundation_model_deploy.side_effect = [
+                ('embeddings_endpoint', 'some code snippet 3')
+            ]
+            with runner.isolated_filesystem():
+                result = runner.invoke(
+                    cli=cli,
+                    args=[
+                        'llm', 'start',
+                        '--embeddings',
+                        '--aws-region', 'us-east-1',
+                        '--aws-profile', 'sagemaker-production'
+                    ]
+                )
+
+                assert mocked_foundation_model_deploy.call_count == 1
+                mocked_foundation_model_deploy.assert_called_with(
+                    model_id='huggingface-sentencesimilarity-gte-small',
+                    model_version='1.*',
+                    num_instances=1,
+                    ec2_type='ml.g5.2xlarge',
+                    aws_region='us-east-1',
+                    aws_profile='sagemaker-production',
+                    aws_role=None,
+                    external_id=None,
+                    tags=None
+                )
+
+                assert os.path.isfile('.sagify_llm_infra.json')
+
+                with open('.sagify_llm_infra.json', 'r') as f:
+                    llm_infra_config = json.load(f)
+
+                assert llm_infra_config['chat_completions_endpoint'] is None
+                assert llm_infra_config['image_creations_endpoint'] is None
+                assert llm_infra_config['embeddings_endpoint'] is not None
+
+                assert result.exit_code == 0
+
+
+class TestLlmStop(object):
+    def test_stop_happy_case(self):
+        runner = CliRunner()
+        with patch(
+            'sagify.commands.llm.sagemaker.SageMakerClient'
+        ) as mocked_sagemaker_client:
+            # from unittest.mock import MagicMock
+
+            # mocked_sagemaker_client.return_value = MagicMock()
+            with runner.isolated_filesystem():
+                with open('.sagify_llm_infra.json', 'w') as f:
+                    json.dump({
+                        'chat_completions_endpoint': 'endpoint1',
+                        'image_creations_endpoint': 'endpoint2',
+                        'embeddings_endpoint': 'endpoint3'
+                    }, f)
+
+                result = runner.invoke(
+                    cli=cli,
+                    args=[
+                        'llm', 'stop',
+                        '--aws-region', 'us-east-1',
+                        '--aws-profile', 'sagemaker-production',
+                        '--iam-role-arn', 'arn:aws:iam::123456789012:role/MyRole',
+                        '--external-id', '123456'
+                    ]
+                )
+
+                mocked_sagemaker_client.assert_called_with(
+                    'sagemaker-production', 'us-east-1', 'arn:aws:iam::123456789012:role/MyRole', '123456'
+                )
+                assert mocked_sagemaker_client.return_value.shutdown_endpoint.call_count == 3
+                mocked_sagemaker_client.return_value.shutdown_endpoint.assert_has_calls(
+                    [
+                        call('endpoint1'),
+                        call('endpoint2'),
+                        call('endpoint3')
+                    ]
+                )
+
+                assert result.exit_code == 0
+
+    def test_stop_missing_config_file(self):
+        runner = CliRunner()
+        with patch(
+            'sagify.commands.llm.sagemaker.SageMakerClient'
+        ) as mocked_sagemaker_client:
+            # mocked_sagemaker_client.return_value = MagicMock()
+            with runner.isolated_filesystem():
+                result = runner.invoke(
+                    cli=cli,
+                    args=[
+                        'llm', 'stop',
+                        '--aws-region', 'us-east-1',
+                        '--aws-profile', 'sagemaker-production',
+                        '--iam-role-arn', 'arn:aws:iam::123456789012:role/MyRole',
+                        '--external-id', '123456'
+                    ]
+                )
+
+                assert mocked_sagemaker_client.return_value.shutdown_endpoint.call_count == 0
+                assert result.exit_code == -1
+
+    def test_stop_endpoint_shutdown_error(self):
+        runner = CliRunner()
+        with patch(
+            'sagify.commands.llm.sagemaker.SageMakerClient'
+        ) as mocked_sagemaker_client:
+            # mocked_sagemaker_client.return_value = MagicMock()
+            mocked_sagemaker_client.return_value.shutdown_endpoint.side_effect = Exception('Endpoint shutdown error')
+            with runner.isolated_filesystem():
+                with open('.sagify_llm_infra.json', 'w') as f:
+                    json.dump({
+                        'chat_completions_endpoint': 'endpoint1',
+                        'image_creations_endpoint': 'endpoint2',
+                        'embeddings_endpoint': 'endpoint3'
+                    }, f)
+
+                result = runner.invoke(
+                    cli=cli,
+                    args=[
+                        'llm', 'stop',
+                        '--aws-region', 'us-east-1',
+                        '--aws-profile', 'sagemaker-production',
+                        '--iam-role-arn', 'arn:aws:iam::123456789012:role/MyRole',
+                        '--external-id', '123456'
+                    ]
+                )
+
+                assert mocked_sagemaker_client.return_value.shutdown_endpoint.call_count == 1
+                mocked_sagemaker_client.return_value.shutdown_endpoint.assert_called_with('endpoint1')
+
+                assert result.exit_code == -1