From e00750e2b24b433d157b514725a69ed4e0e58f70 Mon Sep 17 00:00:00 2001 From: Konrad Zawora Date: Mon, 7 Oct 2024 09:30:12 +0200 Subject: [PATCH] Use BF16 on HPU by default (#361) We don't *officially* support FP16, and for the most part, we use BF16 wherever we can. This removes the need of specifying `--dtype bfloat16` - when `dtype` is not provided (is `auto`), and model default data type is `float16`, we cast it to `bfloat16` for HPU. --- vllm/config.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/vllm/config.py b/vllm/config.py index 786ed1586a3ea..b3329f1c449ff 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -1635,6 +1635,13 @@ def _get_and_verify_dtype( torch_dtype = torch.float16 else: torch_dtype = config_dtype + + if current_platform.is_hpu() and config_dtype == torch.float16: + logger.info( + "For HPU, we cast models to bfloat16 instead of" + "using float16 by default. Please specify `dtype` if you " + "want to use float16.") + torch_dtype = torch.bfloat16 else: if dtype not in _STR_DTYPE_TO_TORCH_DTYPE: raise ValueError(f"Unknown dtype: {dtype}")