Merge pull request #34 from nutanix/main

Merge main into release v1
nutanix · Nov 2, 2023 · 897ba64 · 897ba64
2 parents b6e056c + 8ea2998
commit 897ba64
Show file tree

Hide file tree

Showing 16 changed files with 957 additions and 332 deletions.
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
@@ -20,15 +20,15 @@ jobs:
  uses: actions/checkout@v3
 
  - name: Set up Python
- uses: actions/setup-python@v1
+ uses: actions/setup-python@v4
  with:
  python-version: 3.11
 
  - name: Install Python dependencies
- run: pip install black pylint torchserve==0.8.2 torch==2.0.1 transformers==4.33.0 -r llm/requirements.txt
+ run: pip install pytest black pylint torchserve==0.8.2 torch==2.0.1 transformers==4.33.0 -r llm/requirements.txt
 
- - name: Run linters
- uses: wearerequired/lint-action@v2
- with:
-  black: true
-  pylint: true
+ - name: Run pylint
+ run: pylint ./llm
+
+ - name: Run black
+ run: black ./llm --check
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -0,0 +1,32 @@
+name: Python Test with LLM
+
+on:
+ push:
+ branches:
+ - main # Change this to your main branch
+ pull_request:
+ branches:
+ - main
+
+jobs:
+ test:
+ name: Test with LLM
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v3
+
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: 3.11
+
+ - name: Install dependencies
+ run: pip install pytest -r requirements.txt
+ working-directory: ./llm
+
+ - name: Run tests
+ run: python3 -m pytest tests -v
+ working-directory: ./llm
+
diff --git a/README.md b/README.md
@@ -113,7 +113,8 @@ bash $WORK_DIR/llm/run.sh -n llama2_7b -d data/summarize -g 1 -e llm-deploy -f '
 set HOST and PORT
 ```
 export INGRESS_HOST=$(kubectl get po -l istio=ingressgateway -n istio-system -o jsonpath='{.items[0].status.hostIP}')
-export INGRESS_PORT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name=="http2")].port}')
+
+export INGRESS_PORT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}')
 ```
 
 set Service Host Name
@@ -145,8 +146,37 @@ curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http:
 
 If keep alive flag was set in the bash script, then you can run the following command to stop the server and clean up temporary files
 
-python3 $WORK_DIR/llm/utils/cleanup.py --deploy_name <DEPLOYMENT_NAME>
+python3 $WORK_DIR/llm/cleanup.py --deploy_name <DEPLOYMENT_NAME>
+
+```
+python3 $WORK_DIR/llm/cleanup.py --deploy_name llm-deploy
+```
+
+## Custom Model Support
+
+We provide the capability to generate a MAR file with custom models and start an inference server using Kubeflow serving.<br />
 
+### Generate MAR file for custom model
+To generate the MAR file, run the following:
 ```
-python3 $WORK_DIR/llm/utils/cleanup.py --deploy_name llm-deploy
+python3 download.py --no_download [--repo_version <REPO_COMMIT_ID> --handler <CUSTOM_HANDLER_PATH>] --model_name <MODEL_NAME> --model_path <MODEL_PATH> --output <NFS_LOCAL_MOUNT_LOCATION>
+```
+- no_download: Set flag to skip downloading the model files, must be set for custom models
+- model_name: Name of custom model, this name must not be in model_config
+- repo_version: Any model version, defaults to "1.0" (optional)
+- model_path: Absolute path of custom model files (should be non empty)
+- output: Mount path to your nfs server to be used in the kube PV where config.properties and model archive file be stored
+- handler: Path to custom handler, defaults to llm/handler.py (optional)<br />
+
+### Start Torchserve and run inference for custom model
+Run the following command for starting Kubeflow serving and running inference on the given input with a custom MAR file:
 ```
+bash run.sh -n <CUSTOM_MODEL_NAME> -g <NUM_GPUS> -f <NFS_ADDRESS_WITH_SHARE_PATH> -m <NFS_LOCAL_MOUNT_LOCATION> -e <KUBE_DEPLOYMENT_NAME> [OPTIONAL -d <INPUT_PATH>]
+```
+- n: Name of custom model, this name must not be in model_config
+- d: Absolute path of input data folder (Optional)
+- g: Number of gpus to be used to execute (Set 0 to use cpu)
+- f: NFS server address with share path information
+- m: Mount path to your nfs server to be used in the kube PV where model files and model archive file be stored
+- e: Name of the deployment metadata
+
diff --git a/llm/cleanup.py b/llm/cleanup.py
@@ -8,7 +8,7 @@
 from kserve import KServeClient
 
 
-def kubernetes(deploy_name):
+def kubernetes(deploy_name: str) -> None:
  """
  This function cleans up various Kubernetes resources,
  including deleting the deployment, persistent volume claims (PVCs), and