Skip to content

Commit

Permalink
Simplify deployment of foundation model
Browse files Browse the repository at this point in the history
  • Loading branch information
pm3310 committed Jan 19, 2024
1 parent 391ba14 commit b9ae95b
Showing 1 changed file with 6 additions and 33 deletions.
39 changes: 6 additions & 33 deletions sagify/sagemaker/sagemaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
import sagemaker.huggingface
import sagemaker.xgboost
import sagemaker.sklearn.model
from sagemaker import image_uris, model_uris, payloads
from sagemaker.predictor import Predictor
from sagemaker import payloads
from sagemaker.jumpstart.model import JumpStartModel
from six.moves.urllib.parse import urlparse

import boto3
Expand Down Expand Up @@ -604,44 +604,17 @@ def deploy_foundation_model(
:return: [str], endpoint name
"""
deploy_image_uri = image_uris.retrieve(
region=self.aws_region,
framework=None, # automatically inferred from model_id
image_scope="inference",
model_id=model_id,
model_version=model_version,
instance_type=instance_type,
sagemaker_session=self.sagemaker_session
)

model_uri = model_uris.retrieve(
model = JumpStartModel(
model_id=model_id,
model_version=model_version,
model_scope="inference",
region=self.aws_region,
sagemaker_session=self.sagemaker_session
)

# Increase the maximum response size from the endpoint
env = {
"MMS_MAX_RESPONSE_SIZE": "20000000",
}

model = sage.Model(
image_uri=deploy_image_uri,
model_data=model_uri,
role=self.role,
predictor_cls=Predictor,
name=endpoint_name,
env=env,
sagemaker_session=self.sagemaker_session
sagemaker_session=self.sagemaker_session,
tolerate_deprecated_model=True,
tolerate_vulnerable_model=True
)

model_predictor = model.deploy(
initial_instance_count=instance_count,
instance_type=instance_type,
predictor_cls=Predictor,
endpoint_name=endpoint_name,
tags=tags,
accept_eula=True
)
Expand Down

0 comments on commit b9ae95b

Please sign in to comment.