From 89f51996a0003c03c92eced4eb7708fee63977e8 Mon Sep 17 00:00:00 2001
From: "jakub.binkowski" <jakub.binkowski@pwr.edu.pl>
Date: Wed, 28 Aug 2024 10:09:02 +0000
Subject: [PATCH] Add bielik v2

---
 configs/model/Bielik-11B-v2.2-Instruct.yaml | 11 +++++++++++
 dvc.yaml                                    |  4 +++-
 juddges/models/factory.py                   |  6 ++++--
 3 files changed, 18 insertions(+), 3 deletions(-)
 create mode 100644 configs/model/Bielik-11B-v2.2-Instruct.yaml

diff --git a/configs/model/Bielik-11B-v2.2-Instruct.yaml b/configs/model/Bielik-11B-v2.2-Instruct.yaml
new file mode 100644
index 0000000..9d4016b
--- /dev/null
+++ b/configs/model/Bielik-11B-v2.2-Instruct.yaml
@@ -0,0 +1,11 @@
+name: speakleash/Bielik-11B-v2.2-Instruct
+tokenizer_name: ${.name}
+
+adapter_path: null
+
+max_seq_length: 7_900
+batch_size: 1
+padding: longest
+use_4bit: true
+
+use_unsloth: true
diff --git a/dvc.yaml b/dvc.yaml
index 54e15c7..d6a892f 100644
--- a/dvc.yaml
+++ b/dvc.yaml
@@ -100,6 +100,7 @@ stages:
         - Unsloth-Mistral-7B-Instruct-v0.3
         - Unsloth-Mistral-Nemo-Instruct-2407
         - Bielik-7B-Instruct-v0.1
+        - Bielik-11B-v2.2-Instruct
     cmd: >-
       PYTHONPATH=. python scripts/sft/fine_tune_llm.py
       dataset=${item.dataset}
@@ -126,6 +127,7 @@ stages:
         - Bielik-7B-Instruct-v0.1-fine-tuned
         - trurl-13B-academic
         - qra-13b
+        - Bielik-11B-v2.2-Instruct
       seed: ${seeds}
     cmd: >-
       PYTHONPATH=. python scripts/sft/predict.py
@@ -222,7 +224,7 @@ stages:
         - Unsloth-Mistral-Nemo-Instruct-2407
         - Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en
         # - open_ai_gpt-4o
-        - open_ai_gpt-4o-mini
+        # - open_ai_gpt-4o-mini
       seed: ${seeds}
     cmd: >-
       PYTHONPATH=. python scripts/sft/evaluate.py
diff --git a/juddges/models/factory.py b/juddges/models/factory.py
index cec14b1..5fc4423 100644
--- a/juddges/models/factory.py
+++ b/juddges/models/factory.py
@@ -18,10 +18,12 @@ class ModelForGeneration:
 def get_model(llm_config: LLMConfig, **kwargs: Any) -> ModelForGeneration:
     if "llama" in llm_config.name.lower():
         return get_llama_3(llm_config, **kwargs)
+    elif llm_config.name.lower().startswith("speakleash/bielik-11b-v2"):
+        return get_model_with_default_setup(llm_config, **kwargs)
     elif any(mistral_model in llm_config.name.lower() for mistral_model in ("mistral", "bielik")):
         return get_mistral(llm_config, **kwargs)
     elif any(llama_2_model in llm_config.name.lower() for llama_2_model in ("trurl", "qra")):
-        return get_llama_2_based(llm_config, **kwargs)
+        return get_model_with_default_setup(llm_config, **kwargs)
     else:
         raise ValueError(f"Model: {llm_config} not yet handled or doesn't exists.")
 
@@ -51,7 +53,7 @@ def get_mistral(llm_config: LLMConfig, **kwargs: Any) -> ModelForGeneration:
     )
 
 
-def get_llama_2_based(llm_config: LLMConfig, **kwargs: Any) -> ModelForGeneration:
+def get_model_with_default_setup(llm_config: LLMConfig, **kwargs: Any) -> ModelForGeneration:
     model, tokenizer = _get_model_tokenizer(llm_config, **kwargs)
     tokenizer.padding_side = "left"