[stabilityai_pytorch][inference] Stability AI Inference DLC (aws#3195)

Co-authored-by: arjkesh <[email protected]> Co-authored-by: Shantanu Tripathi <[email protected]>
Stability-AI · Aug 3, 2023 · 39083d3 · 39083d3
1 parent 7264fac
commit 39083d3
Show file tree

Hide file tree

Showing 14 changed files with 444 additions and 41 deletions.
diff --git a/src/image_builder.py b/src/image_builder.py
@@ -82,6 +82,7 @@ def image_builder(buildspec, image_types=[], device_types=[]):
     if (
         "huggingface" in str(BUILDSPEC["framework"])
         or "autogluon" in str(BUILDSPEC["framework"])
+        or "stabilityai" in str(BUILDSPEC["framework"])
         or "trcomp" in str(BUILDSPEC["framework"])
     ):
         os.system("echo login into public ECR")

diff --git a/stabilityai/pytorch/inference/buildspec.yml b/stabilityai/pytorch/inference/buildspec.yml
@@ -9,40 +9,17 @@ arch_type: x86
 repository_info:
   inference_repository: &INFERENCE_REPOSITORY
     image_type: &INFERENCE_IMAGE_TYPE inference
-    root: !join [ *BASE_FRAMEWORK, "/", *INFERENCE_IMAGE_TYPE ]
+    root: !join [ "stabilityai/", *BASE_FRAMEWORK, "/", *INFERENCE_IMAGE_TYPE ]
     repository_name: &REPOSITORY_NAME !join [pr, "-", "stabilityai", "-", *BASE_FRAMEWORK, "-", *INFERENCE_IMAGE_TYPE]
     repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ]
 
 context:
   inference_context: &INFERENCE_CONTEXT
-    torchserve-ec2-entrypoint:
-      source: docker/build_artifacts/torchserve-ec2-entrypoint.py
-      target: torchserve-ec2-entrypoint.py
     torchserve-entrypoint:
-      source: docker/build_artifacts/torchserve-entrypoint.py
+      source: docker/build_artifacts/torchserve-stabilityai-entrypoint.py
       target: torchserve-entrypoint.py
-    config:
-      source: docker/build_artifacts/config.properties
-      target: config.properties
-    deep_learning_container:
-      source: ../../src/deep_learning_container.py
-      target: deep_learning_container.py
 
 images:
-  BuildStabilityaiPytorchCpuPy310InferenceDockerImage:
-    <<: *INFERENCE_REPOSITORY
-    build: &STABILITYAI_PYTORCH_CPU_INFERENCE_PY3 false
-    image_size_baseline: 4900
-    device_type: &DEVICE_TYPE cpu
-    python_version: &DOCKER_PYTHON_VERSION py3
-    tag_python_version: &TAG_PYTHON_VERSION py310
-    os_version: &OS_VERSION ubuntu20.04
-    diffusers_version: &DIFFUSERS_VERSION 1.2.3
-    tag: !join [ *VERSION, "-", 'diffusers',*DIFFUSERS_VERSION, '-', *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ]
-    docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /Dockerfile., *DEVICE_TYPE ]
-    target: sagemaker
-    context:
-      <<: *INFERENCE_CONTEXT
   BuildStabilityaiPytorchGpuPy310InferenceDockerImage:
     <<: *INFERENCE_REPOSITORY
     build: &STABILITYAI_PYTORCH_GPU_INFERENCE_PY3 false
@@ -52,10 +29,9 @@ images:
     tag_python_version: &TAG_PYTHON_VERSION py310
     cuda_version: &CUDA_VERSION cu118
     os_version: &OS_VERSION ubuntu20.04
-    diffusers_version: &DIFFUSERS_VERSION 1.2.3
-    tag: !join [ *VERSION, "-", 'diffusers',*DIFFUSERS_VERSION, '-', *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
+    sgm_version: &SGM_VERSION 0.1.0
+    tag: !join [ *VERSION, "-", 'sgm',*SGM_VERSION, '-', *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
     docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *CUDA_VERSION, /Dockerfile.,
                          *DEVICE_TYPE ]
-    target: sagemaker
     context:
-      <<: *INFERENCE_CONTEXT
+          <<: *INFERENCE_CONTEXT                         
diff --git a/stabilityai/pytorch/inference/docker/2.0/py3/cu118/Dockerfile.gpu b/stabilityai/pytorch/inference/docker/2.0/py3/cu118/Dockerfile.gpu
@@ -0,0 +1,47 @@
+FROM 763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-inference:2.0.1-gpu-py310-cu118-ubuntu20.04-sagemaker
+
+LABEL dlc_major_version="1"
+ARG PYTHON=python3
+ARG XFORMERS_VERSION=0.0.20
+
+# xformers must be installed from source due to the older version of python in the DLC
+RUN pip install ninja \
+  && pip install -v -U git+https://github.com/facebookresearch/xformers.git@v${XFORMERS_VERSION}#egg=xformers
+
+ARG SGM_VERSION=0.1.0
+
+# Install Stability Generative Models, at the moment the wheel install does not work so we need the full repo
+RUN cd /tmp \
+  && git clone https://github.com/stability-ai/generative-models -b ${SGM_VERSION} \
+  && cd generative-models \ 
+  && pip install -r requirements/pt2.txt \
+  && pip install . \
+  && rm -rf /tmp/generative-models
+
+# Resolve pip check conflicts and other issues
+RUN pip install --no-cache-dir -U \
+    "awscli>=1.29.15" \
+    "boto3>=1.28.15" \
+    "certifi>=2023.07.22" \
+    "pyopenssl>=23.2.0" \
+    "cryptography>=41.0.2" \
+    "transformers>=4.23.0"
+
+# Configure Torchserve for large model loading
+ENV TS_DEFAULT_RESPONSE_TIMEOUT=1000
+
+# Copy custom entrypoint, which can unpack cache files
+ENV HUGGINGFACE_HUB_CACHE=/tmp/cache/huggingface/hub
+ENV TRANSFORMERS_CACHE=/tmp/cache/huggingface/transformers
+COPY torchserve-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
+RUN mkdir -p /tmp/cache/huggingface \
+  && chmod +x /usr/local/bin/dockerd-entrypoint.py
+
+RUN HOME_DIR=/root \
+  && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
+  && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
+  && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
+  && chmod +x /usr/local/bin/testOSSCompliance \
+  && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
+  && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
+  && rm -rf ${HOME_DIR}/oss_compliance*
diff --git a/stabilityai/pytorch/inference/docker/build_artifacts/torchserve-stabilityai-entrypoint.py b/stabilityai/pytorch/inference/docker/build_artifacts/torchserve-stabilityai-entrypoint.py
@@ -0,0 +1,48 @@
+# Copyright 2019-2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import os
+import shlex
+import subprocess
+import sys
+
+from sagemaker_inference import environment
+
+SAI_MODEL_CACHE_FILE = os.path.join(
+    environment.model_dir, os.getenv("SAI_MODEL_CACHE_FILE", "stabilityai-model-cache.tar")
+)
+SAI_MODEL_CACHE_PATH = os.getenv("SAI_MODEL_CACHE_PATH", "/tmp/cache")
+SAI_MODEL_CACHE_STATUS_FILE = os.path.join(SAI_MODEL_CACHE_PATH, ".model-cache-unpacked")
+if os.path.exists(SAI_MODEL_CACHE_FILE) and not os.path.exists(SAI_MODEL_CACHE_STATUS_FILE):
+    subprocess.check_call(
+        [
+            "tar",
+            "-x",
+            "-z" if SAI_MODEL_CACHE_FILE.endswith(".gz") else "",
+            "-f",
+            SAI_MODEL_CACHE_FILE,
+            "-C",
+            SAI_MODEL_CACHE_PATH,
+        ]
+    )
+
+if sys.argv[1] == "serve":
+    from sagemaker_pytorch_serving_container import serving
+
+    serving.main()
+else:
+    subprocess.check_call(shlex.split(" ".join(sys.argv[1:])))
+
+# prevent docker exit
+subprocess.call(["tail", "-f", "/dev/null"])
diff --git a/test/dlc_tests/container_tests/bin/security_checks.py b/test/dlc_tests/container_tests/bin/security_checks.py
@@ -3,15 +3,19 @@
 import os
 import time
 import calendar
+import argparse
 
 LOGGER = logging.getLogger(__name__)
 logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
 
 
 def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-i", "--image_uri", help="Provide Image Uri", default="")
+    args = parser.parse_args()
     home_dir = os.path.expanduser("~")
     check_that_cache_dir_is_removed(home_dir)
-    check_that_global_tmp_dir_is_empty()
+    check_that_global_tmp_dir_is_empty(image_uri=args.image_uri)
     check_vim_info_does_not_exists(home_dir)
     check_bash_history(home_dir)
     check_if_any_files_in_subfolder_with_mask_was_last_modified_before_the_boottime(
@@ -49,7 +53,7 @@ def check_that_cache_dir_is_removed(home_dir):
                 )
 
 
-def check_that_global_tmp_dir_is_empty():
+def check_that_global_tmp_dir_is_empty(image_uri=""):
     global_tmp_dir_path = "/tmp/"
     global_tmp_dir_content = [f for f in os.listdir(global_tmp_dir_path)]
     for f in global_tmp_dir_content:
@@ -60,6 +64,8 @@ def check_that_global_tmp_dir_is_empty():
             and "ccNPSUr9.s" not in f
             and "hsperfdata" not in f
         ):
+            if "stabilityai" in image_uri and "cache" in f.lower():
+                continue
             raise ValueError(
                 "/tmp folder includes file that probably should not be there: {}".format(f)
             )

diff --git a/test/dlc_tests/ec2/pytorch/inference/test_pytorch_inference.py b/test/dlc_tests/ec2/pytorch/inference/test_pytorch_inference.py
@@ -133,7 +133,7 @@ def test_ec2_pytorch_inference_eia_gpu(
 
 
 @pytest.mark.usefixtures("feature_torchaudio_present")
-@pytest.mark.usefixtures("sagemaker")
+@pytest.mark.usefixtures("sagemaker", "stabilityai")
 @pytest.mark.integration("pt_torchaudio_gpu")
 @pytest.mark.model("N/A")
 @pytest.mark.parametrize("ec2_instance_type", PT_EC2_GPU_INSTANCE_TYPE, indirect=True)
@@ -163,7 +163,7 @@ def test_pytorch_inference_torchaudio_cpu(pytorch_inference, ec2_connection, cpu
 
 
 @pytest.mark.usefixtures("feature_torchdata_present")
-@pytest.mark.usefixtures("sagemaker")
+@pytest.mark.usefixtures("sagemaker", "stabilityai")
 @pytest.mark.integration("pt_torchdata_gpu")
 @pytest.mark.model("N/A")
 @pytest.mark.parametrize("ec2_instance_type", PT_EC2_GPU_INSTANCE_TYPE, indirect=True)
@@ -246,7 +246,7 @@ def ec2_pytorch_inference(image_uri, processor, ec2_connection, region):
         ec2_connection.run(f"docker rm -f {container_name}", warn=True, hide=True)
 
 
-@pytest.mark.usefixtures("sagemaker")
+@pytest.mark.usefixtures("sagemaker", "stabilityai")
 @pytest.mark.integration("telemetry")
 @pytest.mark.model("N/A")
 @pytest.mark.parametrize("ec2_instance_type", PT_EC2_SINGLE_GPU_INSTANCE_TYPE, indirect=True)

diff --git a/test/dlc_tests/sanity/test_boottime_container_security.py b/test/dlc_tests/sanity/test_boottime_container_security.py
@@ -20,6 +20,6 @@ def test_security(image):
     )
     try:
         docker_exec_cmd = f"docker exec -i {container_name}"
-        run(f"{docker_exec_cmd} python /test/bin/security_checks.py ", hide=True)
+        run(f"{docker_exec_cmd} python /test/bin/security_checks.py --image_uri {image}", hide=True)
     finally:
         run(f"docker rm -f {container_name}", hide=True)
diff --git a/test/dlc_tests/sanity/test_pre_release.py b/test/dlc_tests/sanity/test_pre_release.py
@@ -65,6 +65,10 @@ def test_stray_files(image):
     # Running list of allowed files in the /tmp directory
     allowed_tmp_files = ["hsperfdata_root"]
 
+    # Allow cache dir for SAI images
+    if "stabilityai" in image:
+        allowed_tmp_files.append("cache")
+
     # Ensure stray artifacts are not in the tmp directory
     tmp = run_cmd_on_container(container_name, ctx, "ls -A /tmp")
     _assert_artifact_free(tmp, stray_artifacts)
@@ -716,7 +720,7 @@ def test_cuda_paths(gpu):
     python_version = re.search(r"(py\d+)", image).group(1)
     short_python_version = None
     image_tag = re.search(
-        r":(\d+(\.\d+){2}(-(transformers|diffusers)\d+(\.\d+){2})?-(gpu)-(py\d+)(-cu\d+)-(ubuntu\d+\.\d+)((-ec2)?-example|-ec2|-sagemaker-lite|-sagemaker-full|-sagemaker)?)",
+        r":(\d+(\.\d+){2}(-(transformers|diffusers|sgm)\d+(\.\d+){2})?-(gpu)-(py\d+)(-cu\d+)-(ubuntu\d+\.\d+)((-ec2)?-example|-ec2|-sagemaker-lite|-sagemaker-full|-sagemaker)?)",
         image,
     ).group(1)
 

diff --git a/test/sagemaker_tests/pytorch/inference/integration/__init__.py b/test/sagemaker_tests/pytorch/inference/integration/__init__.py
@@ -46,6 +46,11 @@
 resnet_neuronx_image_list = os.path.join(model_neuronx_dir, "imagenet1000_clsidx_to_labels.txt")
 call_model_fn_once_script = os.path.join(resources_path, code_sub_dir, "call_model_fn_once.py")
 
+stabilityai_path = os.path.join(resources_path, "stabilityai")
+sdxl_path = os.path.join(stabilityai_path, "sdxl-v1")
+sdxl_gpu_path = os.path.join(sdxl_path, gpu_sub_dir)
+sdxl_gpu_script = os.path.join(sdxl_gpu_path, code_sub_dir, "sdxl_inference.py")
+
 ROLE = "dummy/unused-role"
 DEFAULT_TIMEOUT = 20