Skip to content

Commit

Permalink
removed 16 bit precision option
Browse files Browse the repository at this point in the history
  • Loading branch information
saileshd1402 committed Jan 4, 2024
1 parent 5a5bd2b commit 85b6eda
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 10 deletions.
5 changes: 2 additions & 3 deletions llm/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,8 @@ def initialize(self, context: ts.context.Context):
self.tokenizer.padding_side = "left"
logger.info("Tokenizer loaded successfully")

quantize_bits = 16
if os.environ.get("NAI_QUANTIZATION"):
quantize_bits = int(self.get_env_value("NAI_QUANTIZATION"))
quantize_bits = self.get_env_value("NAI_QUANTIZATION")
quantize_bits = int(quantize_bits) if quantize_bits else quantize_bits

if quantize_bits == 4:
bnb_config = transformers.BitsAndBytesConfig(
Expand Down
12 changes: 6 additions & 6 deletions llm/kubeflow_inference_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,10 +387,10 @@ def execute(params: argparse.Namespace) -> None:
model_info["repo_id"] = model_params["repo_id"]
model_info["repo_version"] = check_if_valid_version(model_info, mount_path)

if quantize_bits not in [4, 8, 16]:
print("## Quantization precision bits should be either 4, 8 or 16")
if quantize_bits and int(quantize_bits) not in [4, 8]:
print("## Quantization precision bits should be either 4 or 8")
sys.exit(1)
elif quantize_bits in [4, 8] and not deployment_resources["gpus"]:
elif quantize_bits and deployment_resources["gpus"]:
print("## BitsAndBytes Quantization requires GPUs")
sys.exit(1)
else:
Expand Down Expand Up @@ -450,9 +450,9 @@ def execute(params: argparse.Namespace) -> None:
)
parser.add_argument(
"--quantize_bits",
type=int,
default=16,
help="BitsAndBytes Quantization Precision (4, 8 or 16)",
type=str,
default="",
help="BitsAndBytes Quantization Precision (4 or 8)",
)
# Parse the command-line arguments
args = parser.parse_args()
Expand Down
2 changes: 1 addition & 1 deletion llm/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ function helpFunction()
echo -e "\t-g Number of gpus to be used to execute. Set 0 to use cpu"
echo -e "\t-v Commit id of the HuggingFace Repo."
echo -e "\t-t Your HuggingFace token (Required only for LLAMA2 model)."
echo -e "\t-q BitsAndBytes Quantization Precision (4, 8 or 16)"
echo -e "\t-q BitsAndBytes Quantization Precision (4 or 8)"
exit 1 # Exit script after printing help
}

Expand Down

0 comments on commit 85b6eda

Please sign in to comment.