diff --git a/CHANGELOG.md b/CHANGELOG.md index cdae62f2dc..f55be3b2d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,17 @@ ## [Unreleased] +## [3.5.0](https://github.com/OpenNMT/OpenNMT-py/tree/3.5.0) (2024-02-22) + +* Further improvements and fixes +* Suport for AWQ models +* Add n_best for topp/topk generation +* Support MoE (MIxtral) inference +* Extend HF models converter +* use flash_attn_with_kvcache for faster inference +* Add wikitext2 PPL computation +* Support for Phi-2 models + ## [3.4.3](https://github.com/OpenNMT/OpenNMT-py/tree/3.4.3) (2023-11-2) * Further improvements to beam search and decoding diff --git a/README.md b/README.md index 27b2258e72..98caff85be 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ Otherwise you can just have a look at the [Quickstart](https://opennmt.net/OpenN ---- ## New: -* You will need Pytorch v2 preferably v2.1 which fixes some `scaled_dot_product_attention` issues +* You will need Pytorch v2 preferably v2.2 which fixes some `scaled_dot_product_attention` issues * LLM support with converters for: Llama (+ Mistral), OpenLlama, Redpajama, MPT-7B, Falcon. * Support for 8bit and 4bit quantization along with LoRA adapters, with or without checkpointing. * You can finetune 7B and 13B models on a single RTX 24GB with 4-bit quantization. diff --git a/onmt/__init__.py b/onmt/__init__.py index 45bdc13f53..25507cbad4 100644 --- a/onmt/__init__.py +++ b/onmt/__init__.py @@ -21,4 +21,4 @@ onmt.modules, ] -__version__ = "3.4.3" +__version__ = "3.5.0" diff --git a/setup.py b/setup.py index 9814bb1d2f..ac9211dd14 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ description="A python implementation of OpenNMT", long_description=long_description, long_description_content_type="text/markdown", - version="3.4.3", + version="3.5.0", packages=find_packages(), project_urls={ "Documentation": "http://opennmt.net/OpenNMT-py/", @@ -21,9 +21,9 @@ }, python_requires=">=3.8", install_requires=[ - "torch>=2.0.1,<2.2", + "torch>=2.0.1,<2.3", "configargparse", - "ctranslate2>=3.17,<4", + "ctranslate2>=3.24,<5", "tensorboard>=2.3", "flask", "waitress",