Merge pull request #45 from nutanix/main

merge main into release v2
nutanix · Dec 13, 2023 · dc83ae6 · dc83ae6
2 parents f0ab70f + c62f659
commit dc83ae6
Show file tree

Hide file tree

Showing 4 changed files with 37 additions and 31 deletions.
diff --git a/README.md b/README.md
@@ -12,7 +12,7 @@ This new solution includes:
 - The management interface for enhanced terminal UI or standard CLI
 - Support for a curated set of LLMs including Llama2, Falcon and MPT
 
-Refer to the official [GPT-in-a-Box Documentation](https://opendocs.nutanix.com/gpt-in-a-box/kubernetes/getting_started/) to deploy and validate the inference server on Kubernetes cluster
+Refer to the official [GPT-in-a-Box Documentation](https://opendocs.nutanix.com/gpt-in-a-box/overview/) to deploy and validate the inference server on Kubernetes cluster
 
 ### License
 All source code and other contents in this repository are covered by the Nutanix License and Services Agreement, which is located at https://www.nutanix.com/legal/eula
diff --git a/llm/generate.py b/llm/generate.py
@@ -242,19 +242,15 @@ class with relevant information.
  )
  else:
  print(
- "## If you want to create a model archive file with the supported models, "
+ "## If you want to create a model archive file for supported models, "
  "make sure you're model name is present in the below : "
  )
  print(list(models.keys()))
  print(
- "If you want to create a model archive file for a custom model, there "
- "are two methods:\n"
- "1. If you have already downloaded the custom model files, please include"
- " the --skip_download flag and provide the model_path directory which contains "
- "the model files.\n"
- "2. If you need to download the model files, provide the HuggingFace "
- "repository ID along with a model_path driectory where the model "
- "files are to be downloaded."
+ "\nIf you want to create a model archive file for"
+ " either a Custom Model or other HuggingFace models, "
+ "refer to the official GPT-in-a-Box documentation: "
+ "https://opendocs.nutanix.com/gpt-in-a-box/overview/"
  )
  sys.exit(1)
 

diff --git a/llm/model_config.json b/llm/model_config.json
@@ -1,7 +1,7 @@
 {
  "mpt_7b": {
  "repo_id": "mosaicml/mpt-7b",
- "repo_version": "00f72b21dd089db80c7fb50cb606996751500f6d",
+ "repo_version": "ada218f9a93b5f1c6dce48a4cc9ff01fcba431e7",
  "handler": "handler.py",
  "model_params":{
  "temperature" : 0.7,
@@ -13,8 +13,7 @@
  "batch_size" : 1,
  "max_batch_delay" : 200,
  "response_timeout" : 2000
- },
- "gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"]
+ }
  },
  "falcon_7b": {
  "repo_id": "tiiuae/falcon-7b",
@@ -30,8 +29,7 @@
  "batch_size" : 1,
  "max_batch_delay" : 200,
  "response_timeout" : 2000
- },
- "gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"]
+ }
  },
  "llama2_7b": {
  "repo_id": "meta-llama/Llama-2-7b-hf",
@@ -47,24 +45,36 @@
  "batch_size" : 1,
  "max_batch_delay" : 200,
  "response_timeout" : 2000
- },
- "gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"]
- },
- "phi-1_5": {
- "repo_id": "microsoft/phi-1_5",
- "repo_version": "b6a7e2fe15c21f5847279f23e280cc5a0e7049ef",
- "handler": "handler.py",
- "registration_params":{
- "batch_size" : 1,
- "max_batch_delay" : 200,
- "response_timeout" : 2000
- },
- "gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"]
+ }
  },
  "gpt2": {
  "repo_id": "gpt2",
  "repo_version": "11c5a3d5811f50298f278a704980280950aedb10",
  "handler": "handler.py",
- "gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"]
+ "registration_params": {
+ "batch_size": 1,
+ "max_batch_delay": 200,
+ "response_timeout": 2000
+ }
+ },
+ "codellama_7b_python": {
+ "repo_id": "codellama/CodeLlama-7b-Python-hf",
+ "repo_version": "7ee7b6beb0dece09b0431ea46c03bc1724e21572",
+ "handler": "handler.py",
+ "registration_params": {
+ "batch_size": 1,
+ "max_batch_delay": 200,
+ "response_timeout": 2000
+ }
+ },
+ "llama2_7b_chat": {
+ "repo_id": "meta-llama/Llama-2-7b-chat-hf",
+ "repo_version": "94b07a6e30c3292b8265ed32ffdeccfdadf434a8",
+ "handler": "handler.py",
+ "registration_params": {
+ "batch_size": 1,
+ "max_batch_delay": 200,
+ "response_timeout": 2000
+ }
  }
 }
diff --git a/llm/utils/model_requirements.txt b/llm/utils/model_requirements.txt
@@ -1,5 +1,5 @@
 torch==2.0.1
-tokenizers==0.13.3
-transformers==4.33.0
+tokenizers==0.15.0
+transformers==4.36.0
 accelerate==0.22.0
 einops==0.6.1