Skip to content

Commit

Permalink
Some more docstrings and code-comments (more to come)
Browse files Browse the repository at this point in the history
  • Loading branch information
alvarobartt committed Mar 6, 2024
1 parent db0b2cc commit 0a10dc4
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 3 deletions.
57 changes: 54 additions & 3 deletions src/vertex_ai_huggingface_inference_toolkit/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,35 @@ def build_docker_image(
cuda_version: Optional[str] = None,
extra_requirements: Optional[List[str]] = None,
) -> str:
_cuda_string = f"cu{cuda_version}" if cuda_version is not None else "cpu"
_tag = f"py{python_version}-{_cuda_string}-{framework}-{framework_version}-transformers-{transformers_version}"

"""Builds the Docker image locally using `docker`, building it via `--build-args`
on top of either `Dockerfile.cpu` or `Dockerfile.gpu` provided within the current
package, since those images are already suited for inference in Vertex AI.
Args:
python_version: is the Python version to be installed via `apt-get` install, so
it needs to be provided as a string, i.e. `3.9`, `3.10`, `3.11`, etc.
framework: is the identifier of the deep learning framework to use. Available
options for the moment are `torch`, `tensorflow` and `jax`.
framework_version: is the version of the provided framework as shown in PyPI.
transformers_version: is the version of `transformers` to install, since the
inference code will be run via `transformers`.
cuda_version: is the version of CUDA to use, if planning to deploy the model
within an instance with GPU acceleration. The CUDA versions to be provided
need to be in the format of X.Y.Z, and available at https://hub.docker.com/r/nvidia/cuda/tags?page=1&name=runtime-ubuntu
i.e. "12.3.0", "12.3.2", etc.
extra_requirements: is an optional list of requirements to install within the
image, following the `pip install` formatting i.e. `sentence-transformers >= 2.5.0`.
Returns:
The Docker image name, including the tag, that has been built locally.
"""

# The tag is set in advance, generated from the replacements of the `--build-args`
_device_string = f"cu{cuda_version}" if cuda_version is not None else "cpu"
_tag = f"py{python_version}-{_device_string}-{framework}-{framework_version}-transformers-{transformers_version}"

# The `_build_args` to be replaced in the `Dockerfile` when building it need to be
# prepared in advance, to ensure the formatting and assignment is fine.
_dockerfile = "Dockerfile.cpu"
_build_args = {
"PYTHON_VERSION": python_version,
Expand All @@ -46,15 +72,20 @@ def build_docker_image(
cache_path = CACHE_PATH / _tag / datetime.now().strftime("%Y-%m-%d--%H:%M")
cache_path.mkdir(parents=True, exist_ok=True)

# (Optional) On top of the pre-defined `Dockerfile`, the replacements for the
# `--build-args` are applied using the `_build_args` dictionary
dockerfile_content = open(f"{_path}/{_dockerfile}", mode="r").read()
for arg, value in _build_args.items():
pattern = re.compile(rf"\$\{{\s*{arg}\s*}}")
dockerfile_content = re.sub(pattern, value, dockerfile_content)

# (Optional) The generated `Dockerfile` is stored within the cache for reproducibility
dockerfile_path = cache_path / "Dockerfile"
with dockerfile_path.open(mode="w") as dockerfile:
dockerfile.write(dockerfile_content)

# The `Dockerfile` is built using `platform=linux/amd64` as it will be deployed in
# an instance running Linux
client = docker.from_env() # type: ignore
image, _ = client.images.build( # type: ignore
path=_path,
Expand All @@ -74,14 +105,34 @@ def configure_docker_and_push_image(
repository: str,
image_with_tag: str,
) -> str:
"""Configures Docker to use the Google Cloud Artifact Registry and pushes the image
that has been built locally in advance and so on, available when listing the images
with the command `docker images`.
Args:
project_id: is either the name or the identifier of the project in Google Cloud.
location: is the identifier of the region and zone where the image will be pushed to.
repository: is the name of the Docker repository in Google Artifact Registry.
image_with_tag: is the Docker image built locally, including the tag.
Returns:
The repository path to the Docker image that has been pushed to Google Artifact Registry.
"""

# If no tag has been provided, then assume the tag to use is `latest`
if len(image_with_tag.split(":")) != 2:
image_with_tag += ":latest"

# Configures Docker to be authenticated within the Docker repository before pushing
repository_url = f"{location}-docker.pkg.dev"
# NOTE: running a `gcloud` command via `subprocess` from Python is not the most optimal
# solution at all, but there's no support within Google Python SDKs for `gcloud auth configure-docker`
subprocess.run(["gcloud", "auth", "configure-docker", repository_url, "--quiet"])

repository_path = f"{repository_url}/{project_id}/{repository}/{image_with_tag}"

# Sets the tag to the Docker image (matching the destination path in Google Artifact Registry)
# and pushes the image using that tag.
client = docker.from_env() # type: ignore
client.images.get(image_with_tag).tag(repository_path) # type: ignore
client.images.push(repository_path)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,31 @@ def upload_file_to_gcs(
remote_path: str,
bucket_name: Optional[str] = None,
) -> str:
"""Uploads a file from local storage to Google Cloud Storage.
Args:
project_id: is either the name or the identifier of the project in Google Cloud.
location: is the identifier of the region and zone where the file will be uploaded to.
local_path: is the path to the file in the local storage.
remote_path: is the destination path in Google Cloud Storage where the file will be
uploaded to.
bucket_name: is the name of the bucket in Google Cloud Storage where the file will
be uploaded to.
Returns:
The path in Google Cloud Storage to the uploaded file.
"""

client = Client(project=project_id)

# By default we will use the `vertex-ai-huggingface-inference-toolkit` bucket
# if no bucket has been provided.
if bucket_name is None:
bucket_name = "vertex-ai-huggingface-inference-toolkit"

# If the bucket doesn't exist, we create it, ensuring that the `uniform_bucket_level_access_enabled`
# is enabled so that we don't run into permission issues when downloading files from
# the bucket from the running container in Vertex AI.
bucket = client.bucket(bucket_name)
if not bucket.exists():
warnings.warn(
Expand All @@ -26,6 +46,7 @@ def upload_file_to_gcs(
bucket.iam_configuration.uniform_bucket_level_access_enabled = True
bucket.patch()

# Finally, the blob is created and the file is uploaded to that blob
blob = bucket.blob(remote_path)
blob.upload_from_filename(local_path)
return f"gs://{bucket_name}/{remote_path}" # type: ignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,32 @@


class CustomPredictionHandler(PredictionHandler):
"""Custom class that overrides the default `PredictionHandler` provided within
`google-cloud-aiplatform` to be able to handle the cases where the `artifacts_uri`
is None which could be translated into being able to run the server locally without
the need of an artifact URI as the model is pulled from the Hugging Face Hub.
"""

def __init__(
self,
artifacts_uri: Optional[str] = None,
predictor: Optional[Type[TransformersPredictor]] = None,
) -> None:
"""Initializes the `TransformersPredictor` provided via the `predictor`
arg, since the default `PredictionHandler` won't allow an empty `artifacts_uri`.
Note:
The `predictor` is mandatory, but it's been set as optional since the
`CprModelServer` intializes the `PredictionHandler` without using `kwargs`
so that the first arg is set to be `artifacts_uri` as it's provided without
keyword.
Args:
artifacts_uri: is the Google Cloud Storage URI to the artifact to serve, which
will ideally be the directory where the model is stored in Google Cloud Storage.
predictor: is the `TransformersPredictor` class subclassing `PredictionHandler`,
that implements the logic for the inference on top of the model.
"""

self._predictor = predictor() # type: ignore
self._predictor.load(artifacts_uri)
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,18 @@


class CustomCprModelServer(CprModelServer):
"""Custom class that overrides the default `CprModelServer` provided within
`google-cloud-aiplatform` to be able to run the inference server locally before
going to Vertex AI, in order to better test and debug the potential issues within
the `TransformersPredictor` used.
"""

def __init__(self) -> None:
"""Sets the environment variables required by the `CprModelServer` so as to
be able to run the server with minimal to no configuration, since only the `HF_HUB_ID`
and, optionally, the `HF_TASK` need to be provided.
"""

os.environ["HANDLER_MODULE"] = (
"vertex_ai_huggingface_inference_toolkit.handler.custom_handler"
)
Expand All @@ -23,6 +34,12 @@ def __init__(self) -> None:


if __name__ == "__main__":
"""
Example:
>>> export HF_HUB_ID="cardiffnlp/twitter-roberta-base-sentiment-latest"
>>> export HF_TASK="text-classification"
>>> python vertex_ai_huggingface_inference_toolkit.server.custom_serving.py
"""
import uvicorn

uvicorn.run(
Expand Down

0 comments on commit 0a10dc4

Please sign in to comment.