Skip to content

Commit

Permalink
Merge pull request #45 from nutanix/main
Browse files Browse the repository at this point in the history
merge main into release v2
  • Loading branch information
johnugeorge authored Dec 13, 2023
2 parents f0ab70f + c62f659 commit dc83ae6
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 31 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ This new solution includes:
- The management interface for enhanced terminal UI or standard CLI
- Support for a curated set of LLMs including Llama2, Falcon and MPT

Refer to the official [GPT-in-a-Box Documentation](https://opendocs.nutanix.com/gpt-in-a-box/kubernetes/getting_started/) to deploy and validate the inference server on Kubernetes cluster
Refer to the official [GPT-in-a-Box Documentation](https://opendocs.nutanix.com/gpt-in-a-box/overview/) to deploy and validate the inference server on Kubernetes cluster

### License
All source code and other contents in this repository are covered by the Nutanix License and Services Agreement, which is located at https://www.nutanix.com/legal/eula
14 changes: 5 additions & 9 deletions llm/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,19 +242,15 @@ class with relevant information.
)
else:
print(
"## If you want to create a model archive file with the supported models, "
"## If you want to create a model archive file for supported models, "
"make sure you're model name is present in the below : "
)
print(list(models.keys()))
print(
"If you want to create a model archive file for a custom model, there "
"are two methods:\n"
"1. If you have already downloaded the custom model files, please include"
" the --skip_download flag and provide the model_path directory which contains "
"the model files.\n"
"2. If you need to download the model files, provide the HuggingFace "
"repository ID along with a model_path driectory where the model "
"files are to be downloaded."
"\nIf you want to create a model archive file for"
" either a Custom Model or other HuggingFace models, "
"refer to the official GPT-in-a-Box documentation: "
"https://opendocs.nutanix.com/gpt-in-a-box/overview/"
)
sys.exit(1)

Expand Down
48 changes: 29 additions & 19 deletions llm/model_config.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"mpt_7b": {
"repo_id": "mosaicml/mpt-7b",
"repo_version": "00f72b21dd089db80c7fb50cb606996751500f6d",
"repo_version": "ada218f9a93b5f1c6dce48a4cc9ff01fcba431e7",
"handler": "handler.py",
"model_params":{
"temperature" : 0.7,
Expand All @@ -13,8 +13,7 @@
"batch_size" : 1,
"max_batch_delay" : 200,
"response_timeout" : 2000
},
"gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"]
}
},
"falcon_7b": {
"repo_id": "tiiuae/falcon-7b",
Expand All @@ -30,8 +29,7 @@
"batch_size" : 1,
"max_batch_delay" : 200,
"response_timeout" : 2000
},
"gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"]
}
},
"llama2_7b": {
"repo_id": "meta-llama/Llama-2-7b-hf",
Expand All @@ -47,24 +45,36 @@
"batch_size" : 1,
"max_batch_delay" : 200,
"response_timeout" : 2000
},
"gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"]
},
"phi-1_5": {
"repo_id": "microsoft/phi-1_5",
"repo_version": "b6a7e2fe15c21f5847279f23e280cc5a0e7049ef",
"handler": "handler.py",
"registration_params":{
"batch_size" : 1,
"max_batch_delay" : 200,
"response_timeout" : 2000
},
"gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"]
}
},
"gpt2": {
"repo_id": "gpt2",
"repo_version": "11c5a3d5811f50298f278a704980280950aedb10",
"handler": "handler.py",
"gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"]
"registration_params": {
"batch_size": 1,
"max_batch_delay": 200,
"response_timeout": 2000
}
},
"codellama_7b_python": {
"repo_id": "codellama/CodeLlama-7b-Python-hf",
"repo_version": "7ee7b6beb0dece09b0431ea46c03bc1724e21572",
"handler": "handler.py",
"registration_params": {
"batch_size": 1,
"max_batch_delay": 200,
"response_timeout": 2000
}
},
"llama2_7b_chat": {
"repo_id": "meta-llama/Llama-2-7b-chat-hf",
"repo_version": "94b07a6e30c3292b8265ed32ffdeccfdadf434a8",
"handler": "handler.py",
"registration_params": {
"batch_size": 1,
"max_batch_delay": 200,
"response_timeout": 2000
}
}
}
4 changes: 2 additions & 2 deletions llm/utils/model_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
torch==2.0.1
tokenizers==0.13.3
transformers==4.33.0
tokenizers==0.15.0
transformers==4.36.0
accelerate==0.22.0
einops==0.6.1

0 comments on commit dc83ae6

Please sign in to comment.