From 95a855e212412e2a835511dacb3776f9ed064fe1 Mon Sep 17 00:00:00 2001 From: Mohamed Mekkouri <93391238+MekkCyber@users.noreply.github.com> Date: Thu, 5 Dec 2024 16:11:09 +0100 Subject: [PATCH] Deprecate quanto and switch to optimum-quanto (#35001) * deprecate quanto * fix style --- src/transformers/cache_utils.py | 22 ------------------- src/transformers/generation/utils.py | 3 +-- src/transformers/integrations/quanto.py | 7 +----- .../quantizers/quantizer_quanto.py | 13 +---------- src/transformers/utils/__init__.py | 1 - src/transformers/utils/import_utils.py | 7 ------ 6 files changed, 3 insertions(+), 50 deletions(-) diff --git a/src/transformers/cache_utils.py b/src/transformers/cache_utils.py index 9d4d90f11221db..23f2177b25d529 100644 --- a/src/transformers/cache_utils.py +++ b/src/transformers/cache_utils.py @@ -12,7 +12,6 @@ from .utils import ( is_hqq_available, is_optimum_quanto_available, - is_quanto_available, is_torchdynamo_compiling, logging, ) @@ -790,17 +789,6 @@ def __init__(self, cache_config: CacheConfig) -> None: f"You need optimum-quanto package version to be greater or equal than 0.2.5 to use `QuantoQuantizedCache`. Detected version {optimum_quanto_version}." ) from optimum.quanto import MaxOptimizer, qint2, qint4 - elif is_quanto_available(): - logger.warning_once( - "Importing from quanto will be deprecated in v4.47. Please install optimum-quanto instead `pip install optimum-quanto`" - ) - quanto_version = version.parse(importlib.metadata.version("quanto")) - if quanto_version < version.parse("0.2.0"): - raise ImportError( - f"You need quanto package version to be greater or equal than 0.2.0 to use `QuantoQuantizedCache`. Detected version {quanto_version}. " - f"Since quanto will be deprecated, please install optimum-quanto instead with `pip install -U optimum-quanto`" - ) - from quanto import MaxOptimizer, qint2, qint4 if self.nbits not in [2, 4]: raise ValueError(f"`nbits` for `quanto` backend has to be one of [`2`, `4`] but got {self.nbits}") @@ -824,16 +812,6 @@ def _quantize(self, tensor, axis): scale, zeropoint = self.optimizer(tensor, self.qtype, axis, self.q_group_size) qtensor = quantize_weight(tensor, self.qtype, axis, scale, zeropoint, self.q_group_size) return qtensor - elif is_quanto_available(): - logger.warning_once( - "Importing from quanto will be deprecated in v4.47. Please install optimum-quanto instead `pip install optimum-quanto`" - ) - from quanto import AffineQuantizer - - scale, zeropoint = self.optimizer(tensor, self.qtype.bits, axis, self.q_group_size) - qtensor = AffineQuantizer.apply(tensor, self.qtype, axis, self.q_group_size, scale, zeropoint) - - return qtensor def _dequantize(self, qtensor): return qtensor.dequantize() diff --git a/src/transformers/generation/utils.py b/src/transformers/generation/utils.py index 5ef0c0eb81c87a..015cbebaa8e5dc 100644 --- a/src/transformers/generation/utils.py +++ b/src/transformers/generation/utils.py @@ -45,7 +45,6 @@ is_accelerate_available, is_hqq_available, is_optimum_quanto_available, - is_quanto_available, is_torchdynamo_compiling, logging, ) @@ -1787,7 +1786,7 @@ def _prepare_cache_for_generation( ) cache_class = QUANT_BACKEND_CLASSES_MAPPING[cache_config.backend] - if cache_config.backend == "quanto" and not (is_optimum_quanto_available() or is_quanto_available()): + if cache_config.backend == "quanto" and not is_optimum_quanto_available(): raise ImportError( "You need to install optimum-quanto in order to use KV cache quantization with optimum-quanto backend. " "Please install it via with `pip install optimum-quanto`" diff --git a/src/transformers/integrations/quanto.py b/src/transformers/integrations/quanto.py index 27b32de63bfe55..1c5702321937da 100644 --- a/src/transformers/integrations/quanto.py +++ b/src/transformers/integrations/quanto.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ..utils import is_optimum_quanto_available, is_quanto_available, is_torch_available, logging +from ..utils import is_optimum_quanto_available, is_torch_available, logging if is_torch_available(): @@ -50,11 +50,6 @@ def replace_with_quanto_layers( if is_optimum_quanto_available(): from optimum.quanto import QLayerNorm, QLinear, qfloat8, qint2, qint4, qint8 - elif is_quanto_available(): - logger.warning_once( - "Importing from quanto will be deprecated in v4.47. Please install optimum-quanto instead `pip install optimum-quanto`" - ) - from quanto import QLayerNorm, QLinear, qfloat8, qint2, qint4, qint8 w_mapping = {"float8": qfloat8, "int8": qint8, "int4": qint4, "int2": qint2} a_mapping = {None: None, "float8": qfloat8, "int8": qint8} diff --git a/src/transformers/quantizers/quantizer_quanto.py b/src/transformers/quantizers/quantizer_quanto.py index 4027654dc22162..d91019dea15226 100644 --- a/src/transformers/quantizers/quantizer_quanto.py +++ b/src/transformers/quantizers/quantizer_quanto.py @@ -26,7 +26,6 @@ from ..utils import ( is_accelerate_available, is_optimum_quanto_available, - is_quanto_available, is_torch_available, logging, ) @@ -63,7 +62,7 @@ def post_init(self): ) def validate_environment(self, *args, **kwargs): - if not (is_optimum_quanto_available() or is_quanto_available()): + if not is_optimum_quanto_available(): raise ImportError( "Loading an optimum-quanto quantized model requires optimum-quanto library (`pip install optimum-quanto`)" ) @@ -91,11 +90,6 @@ def update_torch_dtype(self, torch_dtype: "torch.dtype") -> "torch.dtype": def update_missing_keys(self, model, missing_keys: List[str], prefix: str) -> List[str]: if is_optimum_quanto_available(): from optimum.quanto import QModuleMixin - elif is_quanto_available(): - logger.warning_once( - "Importing from quanto will be deprecated in v4.47. Please install optimum-quanto instead `pip install optimum-quanto`" - ) - from quanto import QModuleMixin not_missing_keys = [] for name, module in model.named_modules(): @@ -122,11 +116,6 @@ def check_quantized_param( """ if is_optimum_quanto_available(): from optimum.quanto import QModuleMixin - elif is_quanto_available(): - logger.warning_once( - "Importing from quanto will be deprecated in v4.47. Please install optimum-quanto instead `pip install optimum-quanto`" - ) - from quanto import QModuleMixin device_map = kwargs.get("device_map", None) param_device = kwargs.get("param_device", None) diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py index f7e962bec346fb..08d23e0e6a5d41 100755 --- a/src/transformers/utils/__init__.py +++ b/src/transformers/utils/__init__.py @@ -175,7 +175,6 @@ is_pytesseract_available, is_pytest_available, is_pytorch_quantization_available, - is_quanto_available, is_rjieba_available, is_sacremoses_available, is_safetensors_available, diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py index 2ce4bd7bc778da..32a647594741dd 100755 --- a/src/transformers/utils/import_utils.py +++ b/src/transformers/utils/import_utils.py @@ -997,13 +997,6 @@ def is_auto_awq_available(): return _auto_awq_available -def is_quanto_available(): - logger.warning_once( - "Importing from quanto will be deprecated in v4.47. Please install optimum-quanto instead `pip install optimum-quanto`" - ) - return _quanto_available - - def is_optimum_quanto_available(): # `importlib.metadata.version` doesn't work with `optimum.quanto`, need to put `optimum_quanto` return _is_optimum_quanto_available