nonokangwei · THULiusj · Mar 24, 2023 · Mar 24, 2023 · Mar 24, 2023 · Mar 24, 2023
diff --git a/.DS_Store b/.DS_Store
diff --git a/Archive/.gcloudignore b/Archive/.gcloudignore
diff --git a/Archive/README.md b/Archive/README.md
diff --git a/Archive/cloudbuild.yaml b/Archive/cloudbuild.yaml
diff --git a/Archive/clouddeploy.yaml b/Archive/clouddeploy.yaml
diff --git a/Archive/dcgm_loadtest.yml b/Archive/dcgm_loadtest.yml
diff --git a/Archive/dcgm_loadtest_deployment.yaml b/Archive/dcgm_loadtest_deployment.yaml
diff --git a/Archive/deployment.yaml b/Archive/deployment.yaml
diff --git a/Archive/kustomization.yaml b/Archive/kustomization.yaml
diff --git a/Archive/pvc.yaml b/Archive/pvc.yaml
diff --git a/Archive/skaffold.yaml b/Archive/skaffold.yaml
diff --git a/PEFTonVertex/.DS_Store b/PEFTonVertex/.DS_Store
diff --git a/PEFTonVertex/CustomTraining/Dockerfile b/PEFTonVertex/CustomTraining/Dockerfile
@@ -0,0 +1,27 @@
+FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04
+
+RUN apt update
+RUN apt install -y wget git python3 python3-venv python3-pip
+
+RUN pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 --extra-index-url https://download.pytorch.org/whl/cu117
+
+WORKDIR /root
+
+RUN git clone https://github.com/huggingface/peft.git
+RUN pip install /root/peft 
+RUN git clone https://huggingface.co/spaces/smangrul/peft-lora-sd-dreambooth
+RUN pip install -r /root/peft-lora-sd-dreambooth/requirements.txt
+
+ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/usr/local/cuda/lib64
+RUN ln -s /usr/local/cuda/lib64/libcudart.so.11.0 /usr/local/cuda/lib64/libcudart.so
+RUN pip install -U bitsandbytes --prefer-binary
+
+# Installs additional packages as you need.
+RUN pip install -U google-cloud-aiplatform
+RUN pip install -U google-cloud-storage
+
+# Copies the trainer code to the docker image.
+COPY train.py /root/train.py
+
+# Sets up the entry point to invoke the trainer.
+ENTRYPOINT ["python3", "-m", "train"]
diff --git a/PEFTonVertex/CustomTraining/cloud_build_config.yaml b/PEFTonVertex/CustomTraining/cloud_build_config.yaml
@@ -0,0 +1,8 @@
+steps:
+- name: 'gcr.io/cloud-builders/docker'
+  args: [ 'build', '-t', 'us-central1-docker.pkg.dev/argolis-lsj-test/sd-lsj/sd-peft:v1', '.' ]
+- name: 'gcr.io/cloud-builders/docker'
+  args: ['push', 'us-central1-docker.pkg.dev/argolis-lsj-test/sd-lsj/sd-peft:v1']
+options:
+  machineType: 'N1_HIGHCPU_8'
+  diskSizeGb: '200'
diff --git a/PEFTonVertex/CustomTraining/cloud_cli.sh b/PEFTonVertex/CustomTraining/cloud_cli.sh
@@ -0,0 +1,14 @@
+# cloud build image
+gcloud builds submit --config cloud-build-config.yaml .
+
+# create vertex ai customer training job
+# args format:
+# --model_name: Huggingface repo id, or "/gcs/bucket_name/model_folder". I only test the models downloaded from HF, with standard diffusers format. Safetensors has not been test.
+# --input_storage: bucket_name/input_image_folder
+# --output_storage: bucket_name/output_folder
+# --prompt: a photo of XXX
+gcloud ai custom-jobs create \
+  --region=us-central1 \
+  --display-name=sd-lora-training-peft-1 \
+  --config=vertex-ai-config.yaml \
+  --args="--model_name=runwayml/stable-diffusion-v1-5,--input_storage=/gcs/sd_lsj/input_dog,--output_storage=/gcs/sd_lsj/peft/dog_lora_output,--prompt=a photo of sks dog,--class_prompt=a photo of dog"