From 14ace7a02e0f30c6336dfae4c33767c81655a006 Mon Sep 17 00:00:00 2001 From: Sean Rastatter Date: Wed, 20 Sep 2023 19:31:49 -0400 Subject: [PATCH] updated examples --- ...00_llmops_finetuning_flan_t5_example.ipynb | 32 +++-- .../finetuning-flan-t5/serving/app/main.py | 5 +- .../00_introduction_training_example.ipynb | 112 ++++++++---------- 3 files changed, 68 insertions(+), 81 deletions(-) diff --git a/examples/llmops/finetuning-flan-t5/00_llmops_finetuning_flan_t5_example.ipynb b/examples/llmops/finetuning-flan-t5/00_llmops_finetuning_flan_t5_example.ipynb index 642a259..b83f6eb 100644 --- a/examples/llmops/finetuning-flan-t5/00_llmops_finetuning_flan_t5_example.ipynb +++ b/examples/llmops/finetuning-flan-t5/00_llmops_finetuning_flan_t5_example.ipynb @@ -237,14 +237,7 @@ "metadata": {}, "outputs": [], "source": [ - "PROJECT_ID = '[your-project-id]' # @param {type:\"string\"}\n", - "\n", - "BUCKET_NAME = 'automlops-sandbox-bucket' # @param {type:\"string\"}\n", - "BUCKET_URI = f'gs://{BUCKET_NAME}'\n", - "PREFIX = 'flan_t5_model/'\n", - "MODEL_DIR = BUCKET_URI + '/' + PREFIX\n", - "AF_REGISTRY_NAME = 'vertex-mlops-af' # Artifact Registry name\n", - "REGION = 'us-central1'" + "PROJECT_ID = '[your-project-id]' # @param {type:\"string\"}" ] }, { @@ -305,6 +298,19 @@ "MODEL_ID = 'flan-t5-samsum'" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b20b161", + "metadata": {}, + "outputs": [], + "source": [ + "BUCKET_NAME = f'{PROJECT_ID}-{MODEL_ID}-bucket'\n", + "BUCKET_URI = f'gs://{BUCKET_NAME}'\n", + "AF_REGISTRY_NAME = 'vertex-mlops-af' # Artifact Registry name\n", + "REGION = 'us-central1'" + ] + }, { "cell_type": "markdown", "id": "8ba66e6f", @@ -339,7 +345,7 @@ "id": "c833c551", "metadata": {}, "source": [ - "## Build the Custom Serving image" + "## Build the Custom Serving image - Update [serving/app/main.py](serving/app/main.py) first!" ] }, { @@ -347,9 +353,9 @@ "id": "c7504586", "metadata": {}, "source": [ - "Create a custom serving image for running predictions using FastAPI. **Update [the server](serving/app/main.py) code with your bucket name and model_dir prefix from above.** Then build and push the custom serving image.\n", + "Create a custom serving image for running predictions using FastAPI. **Update [serving/app/main.py](serving/app/main.py) with your BUCKET_NAME from above.** Then build and push the custom serving image.\n", "\n", - "The Artifact Registry resource AF_REGISTRY_NAME must exist prior to submitting this build job." + "**NOTE: You must update serving/app/main.py and the Artifact Registry resource AF_REGISTRY_NAME must exist prior to submitting this build job.**" ] }, { @@ -881,7 +887,7 @@ "pipeline_params = {\n", " 'endpoint_sa': SERVICE_ACCOUNT,\n", " 'project_id': PROJECT_ID,\n", - " 'model_dir': MODEL_DIR,\n", + " 'model_dir': f'{BUCKET_URI}/model',\n", " 'lr': 5e-5,\n", " 'epochs': 5,\n", " 'logging_steps': 500,\n", @@ -940,7 +946,7 @@ " 'replica_count': 1,\n", " 'service_account': SERVICE_ACCOUNT,\n", " 'tensorboard': tensorboard.resource_name,\n", - " 'base_output_directory': f'{BUCKET_URI}/finetune_t5_model/'\n", + " 'base_output_directory': BUCKET_URI\n", " }]\n", ")" ] diff --git a/examples/llmops/finetuning-flan-t5/serving/app/main.py b/examples/llmops/finetuning-flan-t5/serving/app/main.py index c74af2b..ba0d13c 100644 --- a/examples/llmops/finetuning-flan-t5/serving/app/main.py +++ b/examples/llmops/finetuning-flan-t5/serving/app/main.py @@ -5,8 +5,7 @@ from fastapi import FastAPI, Request from transformers import AutoTokenizer, T5ForConditionalGeneration -BUCKET_NAME = 'automlops-sandbox-bucket' # Update -PREFIX = 'flan_t5_model/' # Update +BUCKET_NAME = 'PROJECT_ID-MODEL_ID-bucket' # Update with f'{actual_project_id}-{actual_model_id}-bucket' OUTPUT_FOLDER = '../model-output-flan-t5-base' app = FastAPI() @@ -17,7 +16,7 @@ def download_model_artifacts(): ''' storage_client = storage.Client() bucket = storage_client.get_bucket(BUCKET_NAME) - for blob in bucket.list_blobs(prefix=PREFIX): + for blob in bucket.list_blobs(prefix='model/'): if '.' in blob.name.split('/')[-1]: blob.download_to_filename(OUTPUT_FOLDER + '/' + blob.name.split('/')[-1]) diff --git a/examples/training/00_introduction_training_example.ipynb b/examples/training/00_introduction_training_example.ipynb index 72d7db4..00d1b68 100644 --- a/examples/training/00_introduction_training_example.ipynb +++ b/examples/training/00_introduction_training_example.ipynb @@ -267,13 +267,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Updated property [core/project].\r\n", - "\r\n", - "\r\n", - "Updates are available for some Google Cloud CLI components. To install them,\r\n", - "please run:\r\n", - " $ gcloud components update\r\n", - "\r\n" + "Updated property [core/project].\r\n" ] } ], @@ -697,49 +691,49 @@ "output_type": "stream", "text": [ "WARNING: Provisioning requires these permissions:\n", - "-storage.buckets.get\n", - "-iam.serviceAccounts.actAs\n", - "-storage.buckets.create\n", - "-pubsub.subscriptions.create\n", - "-pubsub.topics.list\n", - "-cloudfunctions.functions.create\n", - "-pubsub.subscriptions.list\n", "-cloudscheduler.jobs.create\n", - "-iam.serviceAccounts.list\n", - "-source.repos.create\n", + "-iam.serviceAccounts.create\n", + "-serviceusage.services.use\n", + "-serviceusage.services.enable\n", + "-artifactregistry.repositories.create\n", + "-source.repos.list\n", "-artifactregistry.repositories.list\n", + "-cloudbuild.builds.list\n", + "-iam.serviceAccounts.list\n", + "-pubsub.topics.list\n", "-cloudbuild.builds.create\n", - "-iam.serviceAccounts.create\n", + "-storage.buckets.create\n", + "-pubsub.topics.create\n", + "-cloudfunctions.functions.create\n", "-cloudfunctions.functions.get\n", + "-pubsub.subscriptions.create\n", + "-iam.serviceAccounts.actAs\n", + "-storage.buckets.get\n", "-resourcemanager.projects.setIamPolicy\n", "-cloudscheduler.jobs.list\n", - "-pubsub.topics.create\n", - "-serviceusage.services.use\n", - "-source.repos.list\n", - "-serviceusage.services.enable\n", - "-cloudbuild.builds.list\n", - "-artifactregistry.repositories.create\n", + "-source.repos.create\n", + "-pubsub.subscriptions.list\n", "\n", "You are currently using: srastatter@google.com. Please check your account permissions.\n", "The following are the recommended roles for provisioning:\n", + "-roles/artifactregistry.admin\n", + "-roles/serviceusage.serviceUsageAdmin\n", + "-roles/cloudbuild.builds.editor\n", "-roles/source.admin\n", + "-roles/iam.serviceAccountUser\n", "-roles/cloudscheduler.admin\n", - "-roles/cloudbuild.builds.editor\n", - "-roles/artifactregistry.admin\n", "-roles/iam.serviceAccountAdmin\n", - "-roles/iam.serviceAccountUser\n", + "-roles/storage.admin\n", "-roles/pubsub.editor\n", "-roles/resourcemanager.projectIamAdmin\n", - "-roles/serviceusage.serviceUsageAdmin\n", - "-roles/storage.admin\n", "-roles/cloudfunctions.admin\n", "\n", "\u001b[0;32m Setting up API services in project automlops-sandbox \u001b[0m\n", - "Operation \"operations/acat.p2-45373616427-ce934737-1ded-43fd-aeca-3b54e51e1987\" finished successfully.\n", + "Operation \"operations/acat.p2-45373616427-2910359e-ca81-4af7-997d-7452e76b8677\" finished successfully.\n", "\u001b[0;32m Setting up Artifact Registry in project automlops-sandbox \u001b[0m\n", "Listing items under project automlops-sandbox, location us-central1.\n", "\n", - "dry-beans-dt-artifact-registry DOCKER STANDARD_REPOSITORY Artifact Registry dry-beans-dt-artifact-registry in us-central1. us-central1 Google-managed key 2023-09-05T11:25:48 2023-09-19T23:35:25 8366.481\n", + "dry-beans-dt-artifact-registry DOCKER STANDARD_REPOSITORY Artifact Registry dry-beans-dt-artifact-registry in us-central1. us-central1 Google-managed key 2023-09-05T11:25:48 2023-09-20T13:10:52 9080.502\n", "Artifact Registry: dry-beans-dt-artifact-registry already exists in project automlops-sandbox\n", "\u001b[0;32m Setting up Storage Bucket in project automlops-sandbox \u001b[0m\n", "gs://automlops-sandbox-dry-beans-dt-bucket/\n", @@ -757,11 +751,11 @@ "\u001b[0;32m Deploying Cloud Functions: dry-beans-dt-job-submission-svc in project automlops-sandbox \u001b[0m\n", "Deploying function (may take a while - up to 2 minutes)...\n", "..\n", - "For Cloud Build Logs, visit: https://console.cloud.google.com/cloud-build/builds;region=us-central1/d848c666-5293-4876-a9f7-51b2778d30ac?project=45373616427\n", - "................................................................done.\n", + "For Cloud Build Logs, visit: https://console.cloud.google.com/cloud-build/builds;region=us-central1/02d7faae-e8b0-409b-84e2-cb2ad5c86164?project=45373616427\n", + "...........................................................done.\n", "availableMemoryMb: 512\n", - "buildId: d848c666-5293-4876-a9f7-51b2778d30ac\n", - "buildName: projects/45373616427/locations/us-central1/builds/d848c666-5293-4876-a9f7-51b2778d30ac\n", + "buildId: 02d7faae-e8b0-409b-84e2-cb2ad5c86164\n", + "buildName: projects/45373616427/locations/us-central1/builds/02d7faae-e8b0-409b-84e2-cb2ad5c86164\n", "dockerRegistry: ARTIFACT_REGISTRY\n", "entryPoint: process_request\n", "eventTrigger:\n", @@ -776,29 +770,17 @@ "name: projects/automlops-sandbox/locations/us-central1/functions/dry-beans-dt-job-submission-svc\n", "runtime: python39\n", "serviceAccountEmail: vertex-pipelines@automlops-sandbox.iam.gserviceaccount.com\n", - "sourceUploadUrl: https://storage.googleapis.com/uploads-961973632599.us-central1.cloudfunctions.appspot.com/47aab611-730e-41ef-b16a-0fbe357536b8.zip\n", + "sourceUploadUrl: https://storage.googleapis.com/uploads-961973632599.us-central1.cloudfunctions.appspot.com/fce36a97-cf63-49c5-94ab-144dcf125cdd.zip\n", "status: ACTIVE\n", "timeout: 540s\n", - "updateTime: '2023-09-20T17:05:50.094Z'\n", - "versionId: '2'\n", + "updateTime: '2023-09-20T18:01:23.259Z'\n", + "versionId: '3'\n", "\u001b[0;32m Setting up Cloud Build Trigger in project automlops-sandbox \u001b[0m\n", "name: dry-beans-dt-build-trigger\n", "Cloudbuild Trigger already exists in project automlops-sandbox for repo dry-beans-dt-repository\n", "\u001b[0;32m Setting up Cloud Scheduler Job in project automlops-sandbox \u001b[0m\n", - "Creating Cloud Scheduler Job: dry-beans-dt-schedule in project automlops-sandbox\n", - "name: projects/automlops-sandbox/locations/us-central1/jobs/dry-beans-dt-schedule\n", - "pubsubTarget:\n", - " data: ewogICAgImJxX3RhYmxlIjogImF1dG9tbG9wcy1zYW5kYm94LnRlc3RfZGF0YXNldC5kcnktYmVhbnMiLAogICAgIm1vZGVsX2RpcmVjdG9yeSI6ICJnczovL2F1dG9tbG9wcy1zYW5kYm94LWRyeS1iZWFucy1kdC1idWNrZXQvdHJhaW5lZF9tb2RlbHMvMjAyMy0wOS0yMCAxMzowMzoyNy4xNjM5NjEiLAogICAgImRhdGFfcGF0aCI6ICJnczovL2F1dG9tbG9wcy1zYW5kYm94LWRyeS1iZWFucy1kdC1idWNrZXQvZGF0YS5jc3YiLAogICAgInByb2plY3RfaWQiOiAiYXV0b21sb3BzLXNhbmRib3giLAogICAgInJlZ2lvbiI6ICJ1cy1jZW50cmFsMSIsCiAgICAiZ3NfcGlwZWxpbmVfc3BlY19wYXRoIjogImdzOi8vYXV0b21sb3BzLXNhbmRib3gtZHJ5LWJlYW5zLWR0LWJ1Y2tldC9waXBlbGluZV9yb290L2RyeS1iZWFucy1kdC9waXBlbGluZV9qb2IuanNvbiIKfQ==\n", - " topicName: projects/automlops-sandbox/topics/dry-beans-dt-queueing-svc\n", - "retryConfig:\n", - " maxBackoffDuration: 3600s\n", - " maxDoublings: 16\n", - " maxRetryDuration: 0s\n", - " minBackoffDuration: 5s\n", - "schedule: 59 11 * * 0\n", - "state: ENABLED\n", - "timeZone: Etc/UTC\n", - "userUpdateTime: '2023-09-20T17:05:53Z'\n" + "dry-beans-dt-schedule us-central1 59 11 * * 0 (Etc/UTC) Pub/Sub ENABLED\n", + "Cloud Scheduler Job: dry-beans-dt-schedule already exists in project automlops-sandbox\n" ] } ], @@ -827,28 +809,28 @@ "output_type": "stream", "text": [ "WARNING: Running precheck for deploying requires these permissions:\n", + "-resourcemanager.projects.getIamPolicy\n", + "-pubsub.subscriptions.get\n", "-cloudbuild.builds.get\n", + "-storage.buckets.update\n", "-iam.serviceAccounts.get\n", + "-artifactregistry.repositories.get\n", + "-cloudfunctions.functions.get\n", "-pubsub.topics.get\n", - "-pubsub.subscriptions.get\n", - "-resourcemanager.projects.getIamPolicy\n", "-source.repos.update\n", "-serviceusage.services.get\n", - "-artifactregistry.repositories.get\n", - "-cloudfunctions.functions.get\n", - "-storage.buckets.update\n", "\n", "You are currently using: srastatter@google.com. Please check your account permissions.\n", "The following are the recommended roles for deploying with precheck:\n", - "-roles/source.writer\n", - "-roles/cloudfunctions.viewer\n", - "-roles/iam.roleViewer\n", + "-roles/pubsub.viewer\n", + "-roles/serviceusage.serviceUsageViewer\n", "-roles/cloudbuild.builds.editor\n", + "-roles/source.writer\n", "-roles/iam.serviceAccountUser\n", - "-roles/serviceusage.serviceUsageViewer\n", - "-roles/pubsub.viewer\n", - "-roles/artifactregistry.reader\n", "-roles/storage.admin\n", + "-roles/iam.roleViewer\n", + "-roles/cloudfunctions.viewer\n", + "-roles/artifactregistry.reader\n", "\n", "Checking for required API services in project automlops-sandbox...\n", "Checking for Artifact Registry in project automlops-sandbox...\n", @@ -864,7 +846,7 @@ "\n", "Initialized empty Git repository in /Users/srastatter/Documents/2023/MLOps-graduation/AutoMLOps-github/examples/training/.git/\n", "Switched to a new branch 'automlops'\n", - "[automlops (root-commit) 81a1cd1] Run AutoMLOps\n", + "[automlops (root-commit) 4c3cee5] Run AutoMLOps\n", " 25 files changed, 1237 insertions(+)\n", " create mode 100644 AutoMLOps/README.md\n", " create mode 100644 AutoMLOps/cloudbuild.yaml\n", @@ -891,9 +873,9 @@ " create mode 100644 AutoMLOps/services/submission_service/Dockerfile\n", " create mode 100644 AutoMLOps/services/submission_service/main.py\n", " create mode 100644 AutoMLOps/services/submission_service/requirements.txt\n", - "remote: Waiting for private key checker: 19/25 objects left \n", + "remote: Waiting for private key checker: 12/25 objects left \n", "To https://source.developers.google.com/p/automlops-sandbox/r/dry-beans-dt-repository\n", - " + 4717cf3...81a1cd1 automlops -> automlops (forced update)\n", + " + 81a1cd1...4c3cee5 automlops -> automlops (forced update)\n", "Pushing code to automlops branch, triggering build...\n", "Cloud Build job running at: https://console.cloud.google.com/cloud-build/builds;region=us-central1\n", "Please wait for this build job to complete.\n",