From 89f51996a0003c03c92eced4eb7708fee63977e8 Mon Sep 17 00:00:00 2001 From: "jakub.binkowski" Date: Wed, 28 Aug 2024 10:09:02 +0000 Subject: [PATCH] Add bielik v2 --- configs/model/Bielik-11B-v2.2-Instruct.yaml | 11 +++++++++++ dvc.yaml | 4 +++- juddges/models/factory.py | 6 ++++-- 3 files changed, 18 insertions(+), 3 deletions(-) create mode 100644 configs/model/Bielik-11B-v2.2-Instruct.yaml diff --git a/configs/model/Bielik-11B-v2.2-Instruct.yaml b/configs/model/Bielik-11B-v2.2-Instruct.yaml new file mode 100644 index 0000000..9d4016b --- /dev/null +++ b/configs/model/Bielik-11B-v2.2-Instruct.yaml @@ -0,0 +1,11 @@ +name: speakleash/Bielik-11B-v2.2-Instruct +tokenizer_name: ${.name} + +adapter_path: null + +max_seq_length: 7_900 +batch_size: 1 +padding: longest +use_4bit: true + +use_unsloth: true diff --git a/dvc.yaml b/dvc.yaml index 54e15c7..d6a892f 100644 --- a/dvc.yaml +++ b/dvc.yaml @@ -100,6 +100,7 @@ stages: - Unsloth-Mistral-7B-Instruct-v0.3 - Unsloth-Mistral-Nemo-Instruct-2407 - Bielik-7B-Instruct-v0.1 + - Bielik-11B-v2.2-Instruct cmd: >- PYTHONPATH=. python scripts/sft/fine_tune_llm.py dataset=${item.dataset} @@ -126,6 +127,7 @@ stages: - Bielik-7B-Instruct-v0.1-fine-tuned - trurl-13B-academic - qra-13b + - Bielik-11B-v2.2-Instruct seed: ${seeds} cmd: >- PYTHONPATH=. python scripts/sft/predict.py @@ -222,7 +224,7 @@ stages: - Unsloth-Mistral-Nemo-Instruct-2407 - Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en # - open_ai_gpt-4o - - open_ai_gpt-4o-mini + # - open_ai_gpt-4o-mini seed: ${seeds} cmd: >- PYTHONPATH=. python scripts/sft/evaluate.py diff --git a/juddges/models/factory.py b/juddges/models/factory.py index cec14b1..5fc4423 100644 --- a/juddges/models/factory.py +++ b/juddges/models/factory.py @@ -18,10 +18,12 @@ class ModelForGeneration: def get_model(llm_config: LLMConfig, **kwargs: Any) -> ModelForGeneration: if "llama" in llm_config.name.lower(): return get_llama_3(llm_config, **kwargs) + elif llm_config.name.lower().startswith("speakleash/bielik-11b-v2"): + return get_model_with_default_setup(llm_config, **kwargs) elif any(mistral_model in llm_config.name.lower() for mistral_model in ("mistral", "bielik")): return get_mistral(llm_config, **kwargs) elif any(llama_2_model in llm_config.name.lower() for llama_2_model in ("trurl", "qra")): - return get_llama_2_based(llm_config, **kwargs) + return get_model_with_default_setup(llm_config, **kwargs) else: raise ValueError(f"Model: {llm_config} not yet handled or doesn't exists.") @@ -51,7 +53,7 @@ def get_mistral(llm_config: LLMConfig, **kwargs: Any) -> ModelForGeneration: ) -def get_llama_2_based(llm_config: LLMConfig, **kwargs: Any) -> ModelForGeneration: +def get_model_with_default_setup(llm_config: LLMConfig, **kwargs: Any) -> ModelForGeneration: model, tokenizer = _get_model_tokenizer(llm_config, **kwargs) tokenizer.padding_side = "left"