code setup for sft on eng

pwr-ai · Nov 26, 2024 · 93acccc · 93acccc
1 parent eef9b30
commit 93acccc
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 11 deletions.
diff --git a/configs/fine_tuning.yaml b/configs/fine_tuning.yaml
@@ -30,19 +30,19 @@ peft_args:
   lora_alpha: 16
   lora_dropout: 0 # Supports any, but = 0 is optimized
   bias: "none" # Supports any, but = "none" is optimized
-  target_modules:
-    [
-      "q_proj",
-      "k_proj",
-      "v_proj",
-      "o_proj",
-      "gate_proj",
-      "up_proj",
-      "down_proj",
-    ]
+  target_modules: "all-linear"
+    #[
+     # "q_proj",
+     # "k_proj",
+     # "v_proj",
+     # "o_proj",
+     # "gate_proj",
+     # "up_proj",
+     # "down_proj",
+    #]
 
 truncate_context: True
-wandb_entity: graph-ml-lab-wust
+wandb_entity: ebi_literature
 wandb_project: juddges-fine-tune
 
 output_dir: data/experiments/fine-tune/${hydra:runtime.choices.model}/${hydra:runtime.choices.dataset}

diff --git a/configs/model/Mistral-Nemo-Instruct-2407.yaml b/configs/model/Mistral-Nemo-Instruct-2407.yaml
@@ -0,0 +1,11 @@
+name: mistralai/Mistral-Nemo-Instruct-2407
+tokenizer_name: ${.name}
+
+adapter_path: null
+
+max_seq_length: 7_900 # can handle larger inputs, but set is equal to llama-3 for better comparison
+padding: longest
+batch_size: 1
+use_4bit: true
+
+use_unsloth: false
diff --git a/dvc.yaml b/dvc.yaml
@@ -101,6 +101,7 @@ stages:
         - Unsloth-Mistral-Nemo-Instruct-2407
         - Bielik-7B-Instruct-v0.1
         - Bielik-11B-v2.2-Instruct
+        - Mistral-Nemo-Instruct-2407
     cmd: >-
       PYTHONPATH=. python scripts/sft/fine_tune_llm.py
       dataset=${item.dataset}