Finish sft experiments (#21)

* Added notebook for inspecting SFT results * Add llama-unsloth to zero-shot eval * Reproduce prediction on 1500 step of Unsloth-Mistral * Add Bielik LLM
pwr-ai · Jun 3, 2024 · 18d4ecd · 18d4ecd
1 parent 11c75ac
commit 18d4ecd
Show file tree

Hide file tree

Showing 11 changed files with 524 additions and 12 deletions.
diff --git a/configs/model/Bielik-7B-Instruct-v0.1.yaml b/configs/model/Bielik-7B-Instruct-v0.1.yaml
@@ -0,0 +1,8 @@
+name: speakleash/Bielik-7B-Instruct-v0.1
+tokenizer_name: ${.name}
+
+adapter_path: null
+
+max_seq_length: 4_000
+padding: longest
+batch_size: 1
diff --git a/configs/model/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.yaml b/configs/model/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.yaml
@@ -0,0 +1,10 @@
+name: unsloth/mistral-7b-instruct-v0.3-bnb-4bit
+tokenizer_name: ${.name}
+
+adapter_path: data/experiments/fine-tune/Unsloth-Mistral-7B-Instruct-v0.3/pl-court-instruct/checkpoint-1500
+
+max_seq_length: 20_000
+padding: longest
+batch_size: 1
+
+use_unsloth: true
diff --git a/data/experiments/fine-tune/Unsloth-Mistral-7B-Instruct-v0.3/.gitignore b/data/experiments/fine-tune/Unsloth-Mistral-7B-Instruct-v0.3/.gitignore
@@ -0,0 +1 @@
+/pl-court-instruct
diff --git a/data/experiments/predict/pl-court-instruct/.gitignore b/data/experiments/predict/pl-court-instruct/.gitignore
@@ -6,3 +6,6 @@
 /outputs_Mistral-7B-Instruct-v0.2-fine-tuned.json
 /outputs_Unsloth-Llama-3-8B-Instruct-fine-tuned.json
 /outputs_Unsloth-Mistral-7B-Instruct-v0.3.json
+/outputs_Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.json
+/outputs_Unsloth-Llama-3-8B-Instruct.json
+/outputs_Bielik-7B-Instruct-v0.1.json
diff --git a/data/experiments/predict/pl-court-instruct/metrics_Bielik-7B-Instruct-v0.1.json b/data/experiments/predict/pl-court-instruct/metrics_Bielik-7B-Instruct-v0.1.json
@@ -0,0 +1,12 @@
+{
+	"full_text_chrf": 0.2468319535255432,
+	"field_chrf": {
+		"court_name": 0.7368742823600769,
+		"date": 0.7829525470733643,
+		"department_name": 0.626532793045044,
+		"judges": 0.30981674790382385,
+		"legal_bases": 0.3045749366283417,
+		"recorder": 0.5168337821960449,
+		"signature": 0.4849330484867096
+	}
+}
diff --git a/data/experiments/predict/pl-court-instruct/metrics_Unsloth-Llama-3-8B-Instruct.json b/data/experiments/predict/pl-court-instruct/metrics_Unsloth-Llama-3-8B-Instruct.json
@@ -0,0 +1,12 @@
+{
+	"full_text_chrf": 0.4385761320590973,
+	"field_chrf": {
+		"court_name": 0.8789530396461487,
+		"date": 0.9822721481323242,
+		"department_name": 0.9057374000549316,
+		"judges": 0.9149863123893738,
+		"legal_bases": 0.42645466327667236,
+		"recorder": 0.7640316486358643,
+		"signature": 0.7549777626991272
+	}
+}
diff --git a/...iments/predict/pl-court-instruct/metrics_Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.json b/...iments/predict/pl-court-instruct/metrics_Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.json
@@ -0,0 +1,12 @@
+{
+	"full_text_chrf": 0.8193286061286926,
+	"field_chrf": {
+		"court_name": 0.9964265823364258,
+		"date": 0.9885857701301575,
+		"department_name": 0.9962303042411804,
+		"judges": 0.981475830078125,
+		"legal_bases": 0.7374544143676758,
+		"recorder": 0.9933416843414307,
+		"signature": 0.9780842661857605
+	}
+}
diff --git a/data/experiments/predict/pl-court-instruct/metrics_summary.md b/data/experiments/predict/pl-court-instruct/metrics_summary.md
@@ -1,7 +1,9 @@
-| llm                                    |   full_text_chrf |   court_name |   date |   department_name |   judges |   legal_bases |   recorder |   signature |
-|:---------------------------------------|-----------------:|-------------:|-------:|------------------:|---------:|--------------:|-----------:|------------:|
-| Meta-Llama-3-8B-Instruct               |            0.247 |        0.862 |  0.971 |             0.833 |    0.882 |         0.287 |      0.805 |       0.778 |
-| Mistral-7B-Instruct-v0.2               |            0.432 |        0.839 |  0.922 |             0.850 |    0.879 |         0.333 |      0.837 |       0.145 |
-| Mistral-7B-Instruct-v0.2-fine-tuned    |            0.772 |        0.987 |  0.990 |             0.965 |    0.952 |         0.600 |      0.979 |       0.972 |
-| Unsloth-Llama-3-8B-Instruct-fine-tuned |            0.828 |        0.995 |  0.989 |             0.986 |    0.977 |         0.601 |      0.993 |       0.994 |
-| Unsloth-Mistral-7B-Instruct-v0.3       |            0.477 |        0.830 |  0.987 |             0.900 |    0.870 |         0.419 |      0.943 |       0.567 |
+| llm                                         |   full_text_chrf |   court_name |   date |   department_name |   judges |   legal_bases |   recorder |   signature |
+|:--------------------------------------------|-----------------:|-------------:|-------:|------------------:|---------:|--------------:|-----------:|------------:|
+| Meta-Llama-3-8B-Instruct                    |            0.247 |        0.862 |  0.971 |             0.833 |    0.882 |         0.287 |      0.805 |       0.778 |
+| Mistral-7B-Instruct-v0.2                    |            0.432 |        0.839 |  0.922 |             0.850 |    0.879 |         0.333 |      0.837 |       0.145 |
+| Mistral-7B-Instruct-v0.2-fine-tuned         |            0.772 |        0.987 |  0.990 |             0.965 |    0.952 |         0.600 |      0.979 |       0.972 |
+| Unsloth-Llama-3-8B-Instruct                 |            0.439 |        0.879 |  0.982 |             0.906 |    0.915 |         0.426 |      0.764 |       0.755 |
+| Unsloth-Llama-3-8B-Instruct-fine-tuned      |            0.828 |        0.995 |  0.989 |             0.986 |    0.977 |         0.601 |      0.993 |       0.994 |
+| Unsloth-Mistral-7B-Instruct-v0.3            |            0.477 |        0.830 |  0.987 |             0.900 |    0.870 |         0.419 |      0.943 |       0.567 |
+| Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned |            0.819 |        0.996 |  0.989 |             0.996 |    0.981 |         0.737 |      0.993 |       0.978 |
diff --git a/dvc.lock b/dvc.lock
@@ -320,8 +320,8 @@ stages:
     outs:
     - path: data/experiments/predict/pl-court-instruct/metrics_summary.md
       hash: md5
-      md5: a72452f53099f61de9d653af1a596a3a
-      size: 1119
+      md5: 80c3922982cb8a41468063481dbf695c
+      size: 1484
   [email protected]:
     cmd: PYTHONPATH=. python scripts/sft/evaluate.py  --output-file 
       data/experiments/predict/pl-court-instruct/outputs_Unsloth-Mistral-7B-Instruct-v0.3.json
@@ -341,3 +341,140 @@ stages:
       hash: md5
       md5: 091b8888275600052dd2dcdd36a55588
       size: 305
+  [email protected]:
+    cmd: PYTHONPATH=. python scripts/sft/predict.py model=Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned
+    deps:
+    - path: configs/model/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.yaml
+      hash: md5
+      md5: 8e8b380ef9bc65715cb833ce104cda20
+      size: 256
+    - path: configs/predict.yaml
+      hash: md5
+      md5: e6b047cf62e612a32381d6221eb99b4e
+      size: 416
+    - path: scripts/sft/predict.py
+      hash: md5
+      md5: 69e4844a715c9c5c75e1127a06472ad4
+      size: 3148
+    outs:
+    - path: 
+        data/experiments/predict/pl-court-instruct/outputs_Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.json
+      hash: md5
+      md5: a4fda5774b367e8924cf07f3bf271922
+      size: 1834778
+  [email protected]:
+    cmd: PYTHONPATH=. python scripts/sft/evaluate.py  --output-file 
+      data/experiments/predict/pl-court-instruct/outputs_Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.json
+    deps:
+    - path: 
+        data/experiments/predict/pl-court-instruct/outputs_Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.json
+      hash: md5
+      md5: a4fda5774b367e8924cf07f3bf271922
+      size: 1834778
+    - path: scripts/sft/evaluate.py
+      hash: md5
+      md5: 5ee442a9a3525af7596bf24c3d724a1d
+      size: 570
+    outs:
+    - path: 
+        data/experiments/predict/pl-court-instruct/metrics_Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.json
+      hash: md5
+      md5: 3b3589929112cb2f199044d240e87bcc
+      size: 305
+  predict@Unsloth-Llama-3-8B-Instruct:
+    cmd: PYTHONPATH=. python scripts/sft/predict.py model=Unsloth-Llama-3-8B-Instruct
+    deps:
+    - path: configs/model/Unsloth-Llama-3-8B-Instruct.yaml
+      hash: md5
+      md5: e97bb2e6bf39f75edea7714d6ba58b77
+      size: 160
+    - path: configs/predict.yaml
+      hash: md5
+      md5: e6b047cf62e612a32381d6221eb99b4e
+      size: 416
+    - path: scripts/sft/predict.py
+      hash: md5
+      md5: 69e4844a715c9c5c75e1127a06472ad4
+      size: 3148
+    outs:
+    - path: 
+        data/experiments/predict/pl-court-instruct/outputs_Unsloth-Llama-3-8B-Instruct.json
+      hash: md5
+      md5: df2f1d464152f87737c8ebb5b0673854
+      size: 2179383
+  evaluate@Unsloth-Llama-3-8B-Instruct:
+    cmd: PYTHONPATH=. python scripts/sft/evaluate.py  --output-file 
+      data/experiments/predict/pl-court-instruct/outputs_Unsloth-Llama-3-8B-Instruct.json
+    deps:
+    - path: 
+        data/experiments/predict/pl-court-instruct/outputs_Unsloth-Llama-3-8B-Instruct.json
+      hash: md5
+      md5: df2f1d464152f87737c8ebb5b0673854
+      size: 2179383
+    - path: scripts/sft/evaluate.py
+      hash: md5
+      md5: 5ee442a9a3525af7596bf24c3d724a1d
+      size: 570
+    outs:
+    - path: 
+        data/experiments/predict/pl-court-instruct/metrics_Unsloth-Llama-3-8B-Instruct.json
+      hash: md5
+      md5: 521a731cc2c45d3eda0656a8e69d505b
+      size: 307
+  [email protected]:
+    cmd: PYTHONPATH=. python scripts/sft/predict.py model=Bielik-7B-Instruct-v0.1
+    deps:
+    - path: configs/model/Bielik-7B-Instruct-v0.1.yaml
+      hash: md5
+      md5: ea2309177451ac16db4c2c7a5b7aed3b
+      size: 140
+    - path: configs/predict.yaml
+      hash: md5
+      md5: e6b047cf62e612a32381d6221eb99b4e
+      size: 416
+    - path: scripts/sft/predict.py
+      hash: md5
+      md5: 69e4844a715c9c5c75e1127a06472ad4
+      size: 3148
+    outs:
+    - path: data/experiments/predict/pl-court-instruct/outputs_Bielik-7B-Instruct-v0.1.json
+      hash: md5
+      md5: 58f1b7a5d06cca3989c8b373c5429162
+      size: 2033178
+  [email protected]:
+    cmd: PYTHONPATH=. python scripts/sft/fine_tune_unsloth.py model=Unsloth-Mistral-7B-Instruct-v0.3
+    deps:
+    - path: configs/fine_tuning.yaml
+      hash: md5
+      md5: 9cd6fd320530e1c8ded7d9c369b8a082
+      size: 440
+    - path: configs/model/Unsloth-Mistral-7B-Instruct-v0.3.yaml
+      hash: md5
+      md5: 71dbbb0a8a2454c7c0210e2d1acd859d
+      size: 167
+    - path: scripts/sft/fine_tune_unsloth.py
+      hash: md5
+      md5: c8a06fdcb01188a621b5fc9cc579ea56
+      size: 6904
+    outs:
+    - path: data/experiments/fine-tune/Unsloth-Mistral-7B-Instruct-v0.3/pl-court-instruct
+      hash: md5
+      md5: 914a39b11765124b6548bfa3f5ef64e1.dir
+      size: 4084044746
+      nfiles: 192
+  [email protected]:
+    cmd: PYTHONPATH=. python scripts/sft/evaluate.py  --output-file data/experiments/predict/pl-court-instruct/outputs_Bielik-7B-Instruct-v0.1.json
+    deps:
+    - path: data/experiments/predict/pl-court-instruct/outputs_Bielik-7B-Instruct-v0.1.json
+      hash: md5
+      md5: 58f1b7a5d06cca3989c8b373c5429162
+      size: 2033178
+    - path: scripts/sft/evaluate.py
+      hash: md5
+      md5: 5ee442a9a3525af7596bf24c3d724a1d
+      size: 570
+    outs:
+    - path: data/experiments/predict/pl-court-instruct/metrics_Bielik-7B-Instruct-v0.1.json
+      hash: md5
+      md5: 2d1b6a392152f2e022a33553265e141a
+      size: 306
diff --git a/dvc.yaml b/dvc.yaml
@@ -58,8 +58,11 @@ stages:
         - Meta-Llama-3-8B-Instruct
         - Mistral-7B-Instruct-v0.2
         - Mistral-7B-Instruct-v0.2-fine-tuned
+        - Bielik-7B-Instruct-v0.1
+        - Unsloth-Llama-3-8B-Instruct
         - Unsloth-Llama-3-8B-Instruct-fine-tuned
         - Unsloth-Mistral-7B-Instruct-v0.3
+        - Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned
     cmd: >-
       PYTHONPATH=. python scripts/sft/predict.py model=${item.model}
     deps:
@@ -72,11 +75,11 @@ stages:
   evaluate:
     matrix:
       model:
-        - Meta-Llama-3-8B-Instruct
-        - Mistral-7B-Instruct-v0.2
-        - Mistral-7B-Instruct-v0.2-fine-tuned
+        - Unsloth-Llama-3-8B-Instruct
         - Unsloth-Llama-3-8B-Instruct-fine-tuned
         - Unsloth-Mistral-7B-Instruct-v0.3
+        - Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned
+        - Bielik-7B-Instruct-v0.1
     cmd: >-
       PYTHONPATH=. python scripts/sft/evaluate.py 
       --output-file data/experiments/predict/pl-court-instruct/outputs_${item.model}.json