Add bielik v2

pwr-ai · Aug 28, 2024 · 89f5199 · 89f5199
1 parent 6e18f71
commit 89f5199
Show file tree

Hide file tree

Showing 3 changed files with 18 additions and 3 deletions.
diff --git a/configs/model/Bielik-11B-v2.2-Instruct.yaml b/configs/model/Bielik-11B-v2.2-Instruct.yaml
@@ -0,0 +1,11 @@
+name: speakleash/Bielik-11B-v2.2-Instruct
+tokenizer_name: ${.name}
+
+adapter_path: null
+
+max_seq_length: 7_900
+batch_size: 1
+padding: longest
+use_4bit: true
+
+use_unsloth: true
diff --git a/dvc.yaml b/dvc.yaml
@@ -100,6 +100,7 @@ stages:
         - Unsloth-Mistral-7B-Instruct-v0.3
         - Unsloth-Mistral-Nemo-Instruct-2407
         - Bielik-7B-Instruct-v0.1
+        - Bielik-11B-v2.2-Instruct
     cmd: >-
       PYTHONPATH=. python scripts/sft/fine_tune_llm.py
       dataset=${item.dataset}
@@ -126,6 +127,7 @@ stages:
         - Bielik-7B-Instruct-v0.1-fine-tuned
         - trurl-13B-academic
         - qra-13b
+        - Bielik-11B-v2.2-Instruct
       seed: ${seeds}
     cmd: >-
       PYTHONPATH=. python scripts/sft/predict.py
@@ -222,7 +224,7 @@ stages:
         - Unsloth-Mistral-Nemo-Instruct-2407
         - Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en
         # - open_ai_gpt-4o
-        - open_ai_gpt-4o-mini
+        # - open_ai_gpt-4o-mini
       seed: ${seeds}
     cmd: >-
       PYTHONPATH=. python scripts/sft/evaluate.py

diff --git a/juddges/models/factory.py b/juddges/models/factory.py
@@ -18,10 +18,12 @@ class ModelForGeneration:
 def get_model(llm_config: LLMConfig, **kwargs: Any) -> ModelForGeneration:
     if "llama" in llm_config.name.lower():
         return get_llama_3(llm_config, **kwargs)
+    elif llm_config.name.lower().startswith("speakleash/bielik-11b-v2"):
+        return get_model_with_default_setup(llm_config, **kwargs)
     elif any(mistral_model in llm_config.name.lower() for mistral_model in ("mistral", "bielik")):
         return get_mistral(llm_config, **kwargs)
     elif any(llama_2_model in llm_config.name.lower() for llama_2_model in ("trurl", "qra")):
-        return get_llama_2_based(llm_config, **kwargs)
+        return get_model_with_default_setup(llm_config, **kwargs)
     else:
         raise ValueError(f"Model: {llm_config} not yet handled or doesn't exists.")
 
@@ -51,7 +53,7 @@ def get_mistral(llm_config: LLMConfig, **kwargs: Any) -> ModelForGeneration:
     )
 
 
-def get_llama_2_based(llm_config: LLMConfig, **kwargs: Any) -> ModelForGeneration:
+def get_model_with_default_setup(llm_config: LLMConfig, **kwargs: Any) -> ModelForGeneration:
     model, tokenizer = _get_model_tokenizer(llm_config, **kwargs)
     tokenizer.padding_side = "left"