Skip to content

Commit

Permalink
Deprecate quanto and switch to optimum-quanto (huggingface#35001)
Browse files Browse the repository at this point in the history
* deprecate quanto

* fix style
  • Loading branch information
MekkCyber authored Dec 5, 2024
1 parent 482cb28 commit 95a855e
Show file tree
Hide file tree
Showing 6 changed files with 3 additions and 50 deletions.
22 changes: 0 additions & 22 deletions src/transformers/cache_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from .utils import (
is_hqq_available,
is_optimum_quanto_available,
is_quanto_available,
is_torchdynamo_compiling,
logging,
)
Expand Down Expand Up @@ -790,17 +789,6 @@ def __init__(self, cache_config: CacheConfig) -> None:
f"You need optimum-quanto package version to be greater or equal than 0.2.5 to use `QuantoQuantizedCache`. Detected version {optimum_quanto_version}."
)
from optimum.quanto import MaxOptimizer, qint2, qint4
elif is_quanto_available():
logger.warning_once(
"Importing from quanto will be deprecated in v4.47. Please install optimum-quanto instead `pip install optimum-quanto`"
)
quanto_version = version.parse(importlib.metadata.version("quanto"))
if quanto_version < version.parse("0.2.0"):
raise ImportError(
f"You need quanto package version to be greater or equal than 0.2.0 to use `QuantoQuantizedCache`. Detected version {quanto_version}. "
f"Since quanto will be deprecated, please install optimum-quanto instead with `pip install -U optimum-quanto`"
)
from quanto import MaxOptimizer, qint2, qint4

if self.nbits not in [2, 4]:
raise ValueError(f"`nbits` for `quanto` backend has to be one of [`2`, `4`] but got {self.nbits}")
Expand All @@ -824,16 +812,6 @@ def _quantize(self, tensor, axis):
scale, zeropoint = self.optimizer(tensor, self.qtype, axis, self.q_group_size)
qtensor = quantize_weight(tensor, self.qtype, axis, scale, zeropoint, self.q_group_size)
return qtensor
elif is_quanto_available():
logger.warning_once(
"Importing from quanto will be deprecated in v4.47. Please install optimum-quanto instead `pip install optimum-quanto`"
)
from quanto import AffineQuantizer

scale, zeropoint = self.optimizer(tensor, self.qtype.bits, axis, self.q_group_size)
qtensor = AffineQuantizer.apply(tensor, self.qtype, axis, self.q_group_size, scale, zeropoint)

return qtensor

def _dequantize(self, qtensor):
return qtensor.dequantize()
Expand Down
3 changes: 1 addition & 2 deletions src/transformers/generation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
is_accelerate_available,
is_hqq_available,
is_optimum_quanto_available,
is_quanto_available,
is_torchdynamo_compiling,
logging,
)
Expand Down Expand Up @@ -1787,7 +1786,7 @@ def _prepare_cache_for_generation(
)
cache_class = QUANT_BACKEND_CLASSES_MAPPING[cache_config.backend]

if cache_config.backend == "quanto" and not (is_optimum_quanto_available() or is_quanto_available()):
if cache_config.backend == "quanto" and not is_optimum_quanto_available():
raise ImportError(
"You need to install optimum-quanto in order to use KV cache quantization with optimum-quanto backend. "
"Please install it via with `pip install optimum-quanto`"
Expand Down
7 changes: 1 addition & 6 deletions src/transformers/integrations/quanto.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from ..utils import is_optimum_quanto_available, is_quanto_available, is_torch_available, logging
from ..utils import is_optimum_quanto_available, is_torch_available, logging


if is_torch_available():
Expand Down Expand Up @@ -50,11 +50,6 @@ def replace_with_quanto_layers(

if is_optimum_quanto_available():
from optimum.quanto import QLayerNorm, QLinear, qfloat8, qint2, qint4, qint8
elif is_quanto_available():
logger.warning_once(
"Importing from quanto will be deprecated in v4.47. Please install optimum-quanto instead `pip install optimum-quanto`"
)
from quanto import QLayerNorm, QLinear, qfloat8, qint2, qint4, qint8

w_mapping = {"float8": qfloat8, "int8": qint8, "int4": qint4, "int2": qint2}
a_mapping = {None: None, "float8": qfloat8, "int8": qint8}
Expand Down
13 changes: 1 addition & 12 deletions src/transformers/quantizers/quantizer_quanto.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
from ..utils import (
is_accelerate_available,
is_optimum_quanto_available,
is_quanto_available,
is_torch_available,
logging,
)
Expand Down Expand Up @@ -63,7 +62,7 @@ def post_init(self):
)

def validate_environment(self, *args, **kwargs):
if not (is_optimum_quanto_available() or is_quanto_available()):
if not is_optimum_quanto_available():
raise ImportError(
"Loading an optimum-quanto quantized model requires optimum-quanto library (`pip install optimum-quanto`)"
)
Expand Down Expand Up @@ -91,11 +90,6 @@ def update_torch_dtype(self, torch_dtype: "torch.dtype") -> "torch.dtype":
def update_missing_keys(self, model, missing_keys: List[str], prefix: str) -> List[str]:
if is_optimum_quanto_available():
from optimum.quanto import QModuleMixin
elif is_quanto_available():
logger.warning_once(
"Importing from quanto will be deprecated in v4.47. Please install optimum-quanto instead `pip install optimum-quanto`"
)
from quanto import QModuleMixin

not_missing_keys = []
for name, module in model.named_modules():
Expand All @@ -122,11 +116,6 @@ def check_quantized_param(
"""
if is_optimum_quanto_available():
from optimum.quanto import QModuleMixin
elif is_quanto_available():
logger.warning_once(
"Importing from quanto will be deprecated in v4.47. Please install optimum-quanto instead `pip install optimum-quanto`"
)
from quanto import QModuleMixin

device_map = kwargs.get("device_map", None)
param_device = kwargs.get("param_device", None)
Expand Down
1 change: 0 additions & 1 deletion src/transformers/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,6 @@
is_pytesseract_available,
is_pytest_available,
is_pytorch_quantization_available,
is_quanto_available,
is_rjieba_available,
is_sacremoses_available,
is_safetensors_available,
Expand Down
7 changes: 0 additions & 7 deletions src/transformers/utils/import_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -997,13 +997,6 @@ def is_auto_awq_available():
return _auto_awq_available


def is_quanto_available():
logger.warning_once(
"Importing from quanto will be deprecated in v4.47. Please install optimum-quanto instead `pip install optimum-quanto`"
)
return _quanto_available


def is_optimum_quanto_available():
# `importlib.metadata.version` doesn't work with `optimum.quanto`, need to put `optimum_quanto`
return _is_optimum_quanto_available
Expand Down

0 comments on commit 95a855e

Please sign in to comment.