Skip to content

Commit

Permalink
Add results for gpt-4o and gpt-4o-mini
Browse files Browse the repository at this point in the history
  • Loading branch information
binkjakub committed Aug 25, 2024
1 parent 2aab5d0 commit 28b2e77
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
| llm | full_text_chrf | court_name | date | department_name | judges | legal_bases | recorder | signature |
|:----------------------------------------------|:-----------------|:----------------|:----------------|:------------------|:----------------|:----------------|:----------------|:----------------|
| Unsloth-Llama-3-8B-Instruct | 0.578 (± 0.000) | 0.865 (± 0.000) | 0.947 (± 0.001) | 0.889 (± 0.032) | 0.905 (± 0.014) | 0.323 (± 0.052) | 0.741 (± 0.002) | 0.672 (± 0.026) |
| Unsloth-Llama-3-8B-Instruct | 0.579 (± 0.001) | 0.865 (± 0.000) | 0.948 (± 0.001) | 0.882 (± 0.026) | 0.902 (± 0.011) | 0.312 (± 0.042) | 0.741 (± 0.002) | 0.665 (± 0.022) |
| Unsloth-Llama-3-8B-Instruct-fine-tuned | 0.747 (± 0.000) | 0.916 (± 0.001) | 0.920 (± 0.002) | 0.902 (± 0.000) | 0.906 (± 0.001) | 0.442 (± 0.001) | 0.812 (± 0.003) | 0.805 (± 0.004) |
| Unsloth-Mistral-7B-Instruct-v0.3 | 0.574 (± 0.001) | 0.397 (± 0.005) | 0.470 (± 0.004) | 0.404 (± 0.005) | 0.424 (± 0.003) | 0.159 (± 0.002) | 0.436 (± 0.003) | 0.159 (± 0.001) |
| Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned | 0.634 (± 0.001) | 0.547 (± 0.003) | 0.549 (± 0.003) | 0.543 (± 0.003) | 0.544 (± 0.003) | 0.366 (± 0.002) | 0.534 (± 0.002) | 0.533 (± 0.001) |
| Unsloth-Mistral-Nemo-Instruct-2407 | 0.520 (± 0.001) | 0.732 (± 0.006) | 0.759 (± 0.005) | 0.687 (± 0.006) | 0.619 (± 0.006) | 0.267 (± 0.002) | 0.690 (± 0.008) | 0.600 (± 0.004) |
| Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned | 0.610 (± 0.000) | 0.782 (± 0.002) | 0.742 (± 0.000) | 0.717 (± 0.002) | 0.717 (± 0.001) | 0.368 (± 0.000) | 0.696 (± 0.003) | 0.650 (± 0.003) |
| open_ai_gpt-4o | 0.651 (± nan) | 0.955 (± nan) | 0.986 (± nan) | 0.971 (± nan) | 0.917 (± nan) | 0.502 (± nan) | 0.834 (± nan) | 0.990 (± nan) |
| open_ai_gpt-4o-mini | 0.646 (± nan) | 0.953 (± nan) | 0.986 (± nan) | 0.976 (± nan) | 0.927 (± nan) | 0.534 (± nan) | 0.969 (± nan) | 0.988 (± nan) |
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
/outputs_997.json
/metrics_997.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/outputs_997.json
/metrics_997.json
78 changes: 78 additions & 0 deletions dvc.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1561,3 +1561,81 @@ stages:
hash: md5
md5: 839c911f542cd7c60c9ae52ef95e9907
size: 1812429
evaluate@open_ai_gpt-4o-mini-997:
cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json
deps:
- path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json
hash: md5
md5: 839c911f542cd7c60c9ae52ef95e9907
size: 1812429
- path: scripts/sft/evaluate.py
hash: md5
md5: 73aa4a7eb8a035c087702457b9401654
size: 636
outs:
- path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/metrics_997.json
hash: md5
md5: fe43f0d25b500a0f2fb2d8199b8034fd
size: 305
[email protected]:
cmd: PYTHONPATH=. python scripts/sft/predict.py model=Bielik-7B-Instruct-v0.1
random_seed=42
output_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json
deps:
- path: configs/model/Bielik-7B-Instruct-v0.1.yaml
hash: md5
md5: c3412525e9819b53fbad06363a07a871
size: 173
- path: configs/predict.yaml
hash: md5
md5: 5fc8b9ac571d4a2209d7d866697252ab
size: 402
- path: scripts/sft/predict.py
hash: md5
md5: f9acd63cd4d682ae2242d7b51f0d974b
size: 3198
outs:
- path:
data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json
hash: md5
md5: 2dc39513a04910c5d0c54380166639d9
size: 2029644
[email protected]:
cmd: PYTHONPATH=. python scripts/sft/predict.py model=Bielik-7B-Instruct-v0.1
random_seed=7312
output_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json
deps:
- path: configs/model/Bielik-7B-Instruct-v0.1.yaml
hash: md5
md5: c3412525e9819b53fbad06363a07a871
size: 173
- path: configs/predict.yaml
hash: md5
md5: 5fc8b9ac571d4a2209d7d866697252ab
size: 402
- path: scripts/sft/predict.py
hash: md5
md5: f9acd63cd4d682ae2242d7b51f0d974b
size: 3198
outs:
- path:
data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json
hash: md5
md5: ae39bf31296ffe82c0f6a3e8c9ff63aa
size: 2014399
evaluate@open_ai_gpt-4o-997:
cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json
deps:
- path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json
hash: md5
md5: 7c5833fdd1419163b286baaa3d71e084
size: 1965252
- path: scripts/sft/evaluate.py
hash: md5
md5: 73aa4a7eb8a035c087702457b9401654
size: 636
outs:
- path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/metrics_997.json
hash: md5
md5: 65c808d4aebd8efe37b94a5128a19de6
size: 306
3 changes: 3 additions & 0 deletions dvc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ stages:
predict_with_api:
matrix:
model:
- gpt-4o
- gpt-4o-mini
seed:
- 997
Expand All @@ -146,6 +147,8 @@ stages:
- Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned
- Unsloth-Mistral-Nemo-Instruct-2407
- Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned
- open_ai_gpt-4o
- open_ai_gpt-4o-mini
seed: ${seeds}
cmd: >-
PYTHONPATH=. python scripts/sft/evaluate.py
Expand Down

0 comments on commit 28b2e77

Please sign in to comment.