diff --git a/configs/model/Bielik-11B-v2.2-Instruct-fine-tuned.yaml b/configs/model/Bielik-11B-v2.2-Instruct-fine-tuned.yaml new file mode 100644 index 0000000..4e29955 --- /dev/null +++ b/configs/model/Bielik-11B-v2.2-Instruct-fine-tuned.yaml @@ -0,0 +1,11 @@ +name: speakleash/Bielik-11B-v2.2-Instruct +tokenizer_name: ${.name} + +adapter_path: data/experiments/fine-tune/Bielik-11B-v2.2-Instruct/pl-court-instruct/checkpoint-1500 + +max_seq_length: 7_900 +batch_size: 1 +padding: longest +use_4bit: true + +use_unsloth: true diff --git a/data/experiments/fine-tune/Bielik-11B-v2.2-Instruct/.gitignore b/data/experiments/fine-tune/Bielik-11B-v2.2-Instruct/.gitignore new file mode 100644 index 0000000..c5110ed --- /dev/null +++ b/data/experiments/fine-tune/Bielik-11B-v2.2-Instruct/.gitignore @@ -0,0 +1 @@ +/pl-court-instruct diff --git a/data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/.gitignore b/data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/.gitignore new file mode 100644 index 0000000..a29b11b --- /dev/null +++ b/data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/.gitignore @@ -0,0 +1,9 @@ +/outputs_42.json +/outputs_7312.json +/outputs_997.json +/metrics_997.json +/metrics_42.json +/metrics_7312.json +/judge_metrics_7312.json +/judge_metrics_42.json +/judge_metrics_997.json diff --git a/data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/.gitignore b/data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/.gitignore index 3e07b1d..4280e22 100644 --- a/data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/.gitignore +++ b/data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/.gitignore @@ -1 +1,9 @@ /outputs_997.json +/outputs_42.json +/outputs_7312.json +/metrics_997.json +/metrics_42.json +/metrics_7312.json +/judge_metrics_42.json +/judge_metrics_7312.json +/judge_metrics_997.json diff --git a/data/experiments/predict/pl-court-instruct/metrics_judge_summary.md b/data/experiments/predict/pl-court-instruct/metrics_judge_summary.md index 8afa231..bde8d3f 100644 --- a/data/experiments/predict/pl-court-instruct/metrics_judge_summary.md +++ b/data/experiments/predict/pl-court-instruct/metrics_judge_summary.md @@ -1,5 +1,17 @@ | llm | assessment | court_name | date | department_name | judges | legal_bases | recorder | signature | |:----------------------------------------------|:----------------|:----------------|:----------------|:------------------|:----------------|:----------------|:----------------|:----------------| +| Bielik-11B-v2.2-Instruct | (Correct) | 0.868 (± 0.003) | 0.914 (± 0.003) | 0.833 (± 0.003) | 0.514 (± 0.004) | 0.024 (± 0.001) | 0.829 (± 0.001) | 0.837 (± 0.001) | +| Bielik-11B-v2.2-Instruct | (Disagreement) | 0.037 (± 0.001) | 0.023 (± 0.000) | 0.067 (± 0.002) | 0.160 (± 0.001) | 0.599 (± 0.002) | 0.005 (± 0.001) | 0.018 (± 0.001) | +| Bielik-11B-v2.2-Instruct | (Subset) | 0.012 (± 0.001) | 0.000 (± 0.000) | 0.019 (± 0.001) | 0.020 (± 0.000) | 0.060 (± 0.000) | 0.041 (± 0.001) | 0.004 (± 0.001) | +| Bielik-11B-v2.2-Instruct | (Superset) | 0.020 (± 0.001) | 0.000 (± 0.000) | 0.017 (± 0.001) | 0.242 (± 0.002) | 0.154 (± 0.002) | 0.002 (± 0.001) | 0.007 (± 0.000) | +| Bielik-11B-v2.2-Instruct | (empty-answer) | 0.064 (± 0.003) | 0.064 (± 0.003) | 0.064 (± 0.003) | 0.065 (± 0.003) | 0.163 (± 0.004) | 0.124 (± 0.001) | 0.134 (± 0.002) | +| Bielik-11B-v2.2-Instruct | (non-evaluable) | 0.000 (± 0.000) | 0.000 (± 0.000) | 0.000 (± 0.000) | 0.000 (± 0.000) | 0.000 (± 0.000) | 0.000 (± 0.000) | 0.000 (± 0.000) | +| Bielik-11B-v2.2-Instruct-fine-tuned | (Correct) | 0.859 (± 0.002) | 0.847 (± 0.001) | 0.848 (± 0.001) | 0.824 (± 0.003) | 0.066 (± 0.003) | 0.647 (± 0.011) | 0.529 (± 0.007) | +| Bielik-11B-v2.2-Instruct-fine-tuned | (Disagreement) | 0.009 (± 0.000) | 0.022 (± 0.001) | 0.009 (± 0.000) | 0.014 (± 0.001) | 0.544 (± 0.002) | 0.044 (± 0.002) | 0.059 (± 0.007) | +| Bielik-11B-v2.2-Instruct-fine-tuned | (Subset) | 0.000 (± 0.000) | 0.000 (± 0.000) | 0.006 (± 0.000) | 0.011 (± 0.001) | 0.010 (± 0.001) | 0.053 (± 0.003) | 0.038 (± 0.006) | +| Bielik-11B-v2.2-Instruct-fine-tuned | (Superset) | 0.000 (± 0.000) | 0.000 (± 0.000) | 0.006 (± 0.000) | 0.020 (± 0.001) | 0.164 (± 0.004) | 0.001 (± 0.001) | 0.001 (± 0.000) | +| Bielik-11B-v2.2-Instruct-fine-tuned | (empty-answer) | 0.132 (± 0.002) | 0.132 (± 0.002) | 0.132 (± 0.002) | 0.132 (± 0.002) | 0.217 (± 0.002) | 0.255 (± 0.012) | 0.373 (± 0.013) | +| Bielik-11B-v2.2-Instruct-fine-tuned | (non-evaluable) | 0.000 (± 0.000) | 0.000 (± 0.000) | 0.000 (± 0.000) | 0.000 (± 0.000) | 0.000 (± 0.000) | 0.000 (± 0.000) | 0.000 (± 0.000) | | Bielik-7B-Instruct-v0.1 | (Correct) | 0.000 (± 0.000) | 0.001 (± 0.001) | 0.000 (± 0.000) | 0.001 (± 0.001) | 0.000 (± 0.000) | 0.000 (± 0.000) | 0.000 (± 0.000) | | Bielik-7B-Instruct-v0.1 | (Disagreement) | 0.000 (± 0.000) | 0.001 (± 0.000) | 0.001 (± 0.000) | 0.002 (± 0.002) | 0.002 (± 0.001) | 0.001 (± 0.001) | 0.001 (± 0.000) | | Bielik-7B-Instruct-v0.1 | (Subset) | 0.000 (± 0.000) | 0.000 (± 0.000) | 0.000 (± 0.000) | 0.000 (± 0.000) | 0.000 (± 0.000) | 0.000 (± 0.000) | 0.000 (± 0.000) | diff --git a/data/experiments/predict/pl-court-instruct/metrics_ngram_summary.md b/data/experiments/predict/pl-court-instruct/metrics_ngram_summary.md index 0c4d7b1..84bcdb7 100644 --- a/data/experiments/predict/pl-court-instruct/metrics_ngram_summary.md +++ b/data/experiments/predict/pl-court-instruct/metrics_ngram_summary.md @@ -1,5 +1,7 @@ | llm | full_text_chrf | court_name | date | department_name | judges | legal_bases | recorder | signature | |:----------------------------------------------|:-----------------|:----------------|:----------------|:------------------|:----------------|:----------------|:----------------|:----------------| +| Bielik-11B-v2.2-Instruct | 0.679 (± 0.001) | 0.891 (± 0.002) | 0.921 (± 0.002) | 0.902 (± 0.003) | 0.858 (± 0.003) | 0.472 (± 0.001) | 0.842 (± 0.001) | 0.790 (± 0.002) | +| Bielik-11B-v2.2-Instruct-fine-tuned | 0.749 (± 0.001) | 0.865 (± 0.001) | 0.856 (± 0.001) | 0.864 (± 0.001) | 0.848 (± 0.002) | 0.548 (± 0.000) | 0.695 (± 0.011) | 0.589 (± 0.010) | | Bielik-7B-Instruct-v0.1 | 0.354 (± 0.001) | 0.000 (± 0.000) | 0.001 (± 0.000) | 0.001 (± 0.000) | 0.001 (± 0.000) | 0.001 (± 0.000) | 0.000 (± 0.000) | 0.000 (± 0.000) | | Bielik-7B-Instruct-v0.1-fine-tuned | 0.717 (± 0.000) | 0.890 (± 0.007) | 0.863 (± 0.007) | 0.886 (± 0.007) | 0.879 (± 0.007) | 0.465 (± 0.004) | 0.639 (± 0.001) | 0.459 (± 0.002) | | Unsloth-Llama-3-8B-Instruct | 0.579 (± 0.001) | 0.863 (± 0.002) | 0.946 (± 0.002) | 0.909 (± 0.002) | 0.912 (± 0.003) | 0.362 (± 0.002) | 0.735 (± 0.004) | 0.686 (± 0.004) | diff --git a/dvc.lock b/dvc.lock index ad89cf6..7c5fb51 100644 --- a/dvc.lock +++ b/dvc.lock @@ -4,5417 +4,3026 @@ stages: cmd: PYTHONPATH=. python scripts/embed/aggregate_embeddings.py --embeddings-dir data/embeddings/pl-court-raw/mmlw-roberta-large/all_embeddings deps: - - path: data/embeddings/pl-court-raw/mmlw-roberta-large/all_embeddings - hash: md5 + - hash: md5 md5: 1a086db46b90b0f3c4c66c3ecefe8adb.dir - size: 24415235644 nfiles: 53 - - path: scripts/embed/aggregate_embeddings.py - hash: md5 + path: data/embeddings/pl-court-raw/mmlw-roberta-large/all_embeddings + size: 24415235644 + - hash: md5 md5: edb817e03c0c1c20822eda0e445f5083 + path: scripts/embed/aggregate_embeddings.py size: 1839 outs: - - path: data/embeddings/pl-court-raw/mmlw-roberta-large/agg_embeddings.pt - hash: md5 + - hash: md5 md5: 0d84b4da5513feeb6ca9bad70a2ff164 + path: data/embeddings/pl-court-raw/mmlw-roberta-large/agg_embeddings.pt size: 1725566207 build_graph_dataset: cmd: PYTHONPATH=. python scripts/dataset/build_graph_dataset.py --dataset-dir data/datasets/pl/raw --embeddings-root-dir data/embeddings/pl-court-raw/mmlw-roberta-large/ --target-dir data/datasets/pl/graph deps: - - path: data/datasets/pl/raw - hash: md5 + - hash: md5 md5: 5dd44be2eea852bcce3d0918ff8b97da.dir - size: 10234880729 nfiles: 17 - - path: data/embeddings/pl-court-raw/mmlw-roberta-large/agg_embeddings.pt - hash: md5 + path: data/datasets/pl/raw + size: 10234880729 + - hash: md5 md5: 0d84b4da5513feeb6ca9bad70a2ff164 + path: data/embeddings/pl-court-raw/mmlw-roberta-large/agg_embeddings.pt size: 1725566207 - - path: data/embeddings/pl-court-raw/mmlw-roberta-large/all_embeddings/config.yaml - hash: md5 + - hash: md5 md5: fbb5585b8c3ef28255801d38c9248f8e + path: data/embeddings/pl-court-raw/mmlw-roberta-large/all_embeddings/config.yaml size: 502 - - path: juddges/data/pl_court_graph.py - hash: md5 + - hash: md5 md5: 730e3d92be26408bd6dc26606b4c22ff + path: juddges/data/pl_court_graph.py size: 4974 - - path: scripts/dataset/build_graph_dataset.py - hash: md5 + - hash: md5 md5: e7f76dc4f24d884291e1f0b66d8244a8 + path: scripts/dataset/build_graph_dataset.py size: 1159 outs: - - path: data/datasets/pl/graph/data - hash: md5 + - hash: md5 md5: f2820796cff4578c11ffcb0fa6cdadd7.dir - size: 1823760294 nfiles: 2 - - path: data/datasets/pl/graph/metadata.yaml - hash: md5 + path: data/datasets/pl/graph/data + size: 1823760294 + - hash: md5 md5: 68b09dd0ce741e6ee1fff4e37c954fa6 + path: data/datasets/pl/graph/metadata.yaml size: 564 - build_instruct_dataset: - cmd: PYTHONPATH=. python scripts/dataset/build_instruct_dataset.py --dataset-dir - data/datasets/pl/raw --repo-id JuDDGES/pl-court-instruct + build_instruct_dataset_en: + cmd: PYTHONPATH=. python scripts/dataset/build_instruct_dataset_en.py --repo-id + JuDDGES/en-court-instruct deps: - - path: data/datasets/pl/raw - hash: md5 - md5: 5dd44be2eea852bcce3d0918ff8b97da.dir - size: 10234880729 - nfiles: 17 - - path: scripts/dataset/build_instruct_dataset.py - hash: md5 - md5: 5038c49e847d847ea3fd05903624d5c9 - size: 5696 + - hash: md5 + md5: 39e530fbd8c7f3a696e117ee13578e1f + path: scripts/dataset/build_instruct_dataset_en.py + size: 5203 embed@mmlw-roberta-large: cmd: PYTHONPATH=. python scripts/embed/embed_text.py embedding_model=mmlw-roberta-large deps: - - path: configs/embedding.yaml - hash: md5 + - hash: md5 md5: 22fa56f7d7d5a1c1372a8a8b57b02ba8 + path: configs/embedding.yaml size: 467 - - path: configs/embedding_model/mmlw-roberta-large.yaml - hash: md5 + - hash: md5 md5: 22f36cfd196c0fdc3cfd8a036d52b606 + path: configs/embedding_model/mmlw-roberta-large.yaml size: 52 - - path: data/datasets/pl/raw - hash: md5 + - hash: md5 md5: 5dd44be2eea852bcce3d0918ff8b97da.dir - size: 10234880729 nfiles: 17 - - path: scripts/embed/embed_text.py - hash: md5 + path: data/datasets/pl/raw + size: 10234880729 + - hash: md5 md5: a2953ae4974ef96d62063b5c2711e967 + path: scripts/embed/embed_text.py size: 3549 outs: - - path: data/embeddings/pl-court-raw/mmlw-roberta-large/all_embeddings - hash: md5 + - hash: md5 md5: 1a086db46b90b0f3c4c66c3ecefe8adb.dir - size: 24415235644 nfiles: 53 - evaluate@Unsloth-Llama-3-8B-Instruct: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/outputs_Unsloth-Llama-3-8B-Instruct.json - deps: - - path: - data/experiments/predict/pl-court-instruct/outputs_Unsloth-Llama-3-8B-Instruct.json - hash: md5 - md5: df2f1d464152f87737c8ebb5b0673854 - size: 2179383 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 66211e8b6f056234240f094896966a9c - size: 578 - outs: - - path: - data/experiments/predict/pl-court-instruct/metrics_Unsloth-Llama-3-8B-Instruct.json - hash: md5 - md5: 521a731cc2c45d3eda0656a8e69d505b - size: 307 - evaluate@Unsloth-Llama-3-8B-Instruct-fine-tuned: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/outputs_Unsloth-Llama-3-8B-Instruct-fine-tuned.json - deps: - - path: - data/experiments/predict/pl-court-instruct/outputs_Unsloth-Llama-3-8B-Instruct-fine-tuned.json - hash: md5 - md5: 9199da7e04fb35cc1ce2bbe9dd5cd274 - size: 1891254 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 66211e8b6f056234240f094896966a9c - size: 578 - outs: - - path: - data/experiments/predict/pl-court-instruct/metrics_Unsloth-Llama-3-8B-Instruct-fine-tuned.json - hash: md5 - md5: 6a0eb30a14687342bc86ae80253cd60c - size: 306 - evaluate@Unsloth-Mistral-7B-Instruct-v0.3: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/outputs_Unsloth-Mistral-7B-Instruct-v0.3.json - deps: - - path: - data/experiments/predict/pl-court-instruct/outputs_Unsloth-Mistral-7B-Instruct-v0.3.json - hash: md5 - md5: c2e03f3fbd29c744023bdac7e1007265 - size: 2007040 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 66211e8b6f056234240f094896966a9c - size: 578 - outs: - - path: - data/experiments/predict/pl-court-instruct/metrics_Unsloth-Mistral-7B-Instruct-v0.3.json - hash: md5 - md5: 091b8888275600052dd2dcdd36a55588 - size: 305 - evaluate@Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/outputs_Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.json - deps: - - path: - data/experiments/predict/pl-court-instruct/outputs_Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.json - hash: md5 - md5: a4fda5774b367e8924cf07f3bf271922 - size: 1834778 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 66211e8b6f056234240f094896966a9c - size: 578 - outs: - - path: - data/experiments/predict/pl-court-instruct/metrics_Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.json - hash: md5 - md5: 3b3589929112cb2f199044d240e87bcc - size: 305 - instruct_dataset_readme: - cmd: jupyter nbconvert --no-input --to markdown --execute nbs/Data/03_Dataset_Description_Instruct.ipynb - --output-dir data/datasets/pl/readme/instruct --output README + path: data/embeddings/pl-court-raw/mmlw-roberta-large/all_embeddings + size: 24415235644 + evaluate_api_models@en-court-instruct-open_ai_gpt-4o-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/open_ai_gpt-4o/outputs_997.json + --num-proc=-1 deps: - - path: nbs/Data/03_Dataset_Description_Instruct.ipynb - hash: md5 - md5: 27e6d517445028d45e5c40b22febece4 - size: 16215 + - hash: md5 + md5: 8f70e2baa0b0ae8a320577f5c8a60011 + path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o/outputs_997.json + size: 679432 + - hash: md5 + md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py + size: 697 outs: - - path: data/datasets/pl/readme/instruct/ - hash: md5 - md5: de02794df3d74d86f8610f040a17dcbe.dir - size: 144326 - nfiles: 5 - predict@Unsloth-Llama-3-8B-Instruct: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Unsloth-Llama-3-8B-Instruct - deps: - - path: configs/model/Unsloth-Llama-3-8B-Instruct.yaml - hash: md5 - md5: 1b4c0353b8c41fd3656ec5cf15eb6c2b - size: 161 - - path: configs/predict.yaml - hash: md5 - md5: 888667e56c54157be4d75f85657cf478 - size: 494 - - path: scripts/sft/predict.py - hash: md5 - md5: 1dc3e25365c4200d1e26e04b41d6b831 - size: 3188 - outs: - - path: - data/experiments/predict/pl-court-instruct/outputs_Unsloth-Llama-3-8B-Instruct.json - hash: md5 - md5: df2f1d464152f87737c8ebb5b0673854 - size: 2179383 - predict@Unsloth-Llama-3-8B-Instruct-fine-tuned: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Unsloth-Llama-3-8B-Instruct-fine-tuned - deps: - - path: configs/model/Unsloth-Llama-3-8B-Instruct-fine-tuned.yaml - hash: md5 - md5: dd00fc3994bdc95baf1f17de7b026a0f - size: 245 - - path: configs/predict.yaml - hash: md5 - md5: 7422a2c12c7d31d7b68dbe89f02dab5a - size: 532 - - path: scripts/sft/predict.py - hash: md5 - md5: 150d40027312348c19a82ca4f89b4cc6 - size: 2735 - outs: - - path: - data/experiments/predict/pl-court-instruct/outputs_Unsloth-Llama-3-8B-Instruct-fine-tuned.json - hash: md5 - md5: 5c49073109ca97d16501ca74fc568df7 - size: 1742376 - predict@Unsloth-Mistral-7B-Instruct-v0.3: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Unsloth-Mistral-7B-Instruct-v0.3 - deps: - - path: configs/model/Unsloth-Mistral-7B-Instruct-v0.3.yaml - hash: md5 - md5: 71dbbb0a8a2454c7c0210e2d1acd859d - size: 167 - - path: configs/predict.yaml - hash: md5 - md5: 888667e56c54157be4d75f85657cf478 - size: 494 - - path: scripts/sft/predict.py - hash: md5 - md5: 1dc3e25365c4200d1e26e04b41d6b831 - size: 3188 - outs: - - path: - data/experiments/predict/pl-court-instruct/outputs_Unsloth-Mistral-7B-Instruct-v0.3.json - hash: md5 - md5: c2e03f3fbd29c744023bdac7e1007265 - size: 2007040 - predict@Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned - deps: - - path: configs/model/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.yaml - hash: md5 - md5: 8e8b380ef9bc65715cb833ce104cda20 - size: 256 - - path: configs/predict.yaml - hash: md5 - md5: 888667e56c54157be4d75f85657cf478 - size: 494 - - path: scripts/sft/predict.py - hash: md5 - md5: 1dc3e25365c4200d1e26e04b41d6b831 - size: 3188 - outs: - - path: - data/experiments/predict/pl-court-instruct/outputs_Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.json - hash: md5 - md5: a4fda5774b367e8924cf07f3bf271922 - size: 1834778 - raw_dataset_readme: - cmd: jupyter nbconvert --no-input --to markdown --execute 'nbs/Dataset Cards/01_Dataset_Description_Raw.ipynb' - --output-dir data/datasets/pl/readme/raw --output README + - hash: md5 + md5: ac30bcf3c40000cab61e0914b56aba85 + path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o/metrics_997.json + size: 157 + evaluate_api_models@en-court-instruct-open_ai_gpt-4o-mini-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/outputs_997.json + --num-proc=-1 deps: - - path: data/datasets/pl/raw - hash: md5 - md5: 622ba21868561c26fb6877ad95bfb5c5.dir - size: 10234505621 - nfiles: 17 - - path: nbs/Dataset Cards/01_Dataset_Description_Raw.ipynb - hash: md5 - md5: 11b39233ef419de713493cb5ec8bcfd9 - size: 77118 + - hash: md5 + md5: 2a0819011b3eac56e497201a9f67e310 + path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/outputs_997.json + size: 690306 + - hash: md5 + md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py + size: 697 outs: - - path: data/datasets/pl/readme/raw/ - hash: md5 - md5: c82b8238e3043491c6fa49e9641e8dac.dir - size: 475420 - nfiles: 8 - sft_unsloth@Unsloth-Llama-3-8B-Instruct: - cmd: PYTHONPATH=. python scripts/sft/fine_tune_llm.py model=Unsloth-Llama-3-8B-Instruct + - hash: md5 + md5: d70eb0821aff9c9e874a421b80f7f697 + path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/metrics_997.json + size: 155 + evaluate_api_models@pl-court-instruct-open_ai_gpt-4o-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json + --num-proc=-1 deps: - - path: configs/fine_tuning.yaml - hash: md5 - md5: 3933c4faf5a478d0f9d3963c3b29e5cc - size: 1356 - - path: configs/model/Unsloth-Llama-3-8B-Instruct.yaml - hash: md5 - md5: 56a95874b3e77e7ffec11c00330da5b6 - size: 176 - - path: scripts/sft/fine_tune_llm.py - hash: md5 - md5: 4b77ee1ea604cae18f17ca00cdb6988b - size: 4578 + - hash: md5 + md5: 7c5833fdd1419163b286baaa3d71e084 + path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json + size: 1965252 + - hash: md5 + md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py + size: 697 outs: - - path: data/experiments/fine-tune/Unsloth-Llama-3-8B-Instruct/pl-court-instruct - hash: md5 - md5: d9850d30d221f257e1453a66a6c1eef3.dir - size: 784320233 - nfiles: 33 - sft_unsloth@Unsloth-Mistral-7B-Instruct-v0.3: - cmd: PYTHONPATH=. python scripts/sft/fine_tune_llm.py model=Unsloth-Mistral-7B-Instruct-v0.3 + - hash: md5 + md5: 65c808d4aebd8efe37b94a5128a19de6 + path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/metrics_997.json + size: 306 + evaluate_api_models@pl-court-instruct-open_ai_gpt-4o-mini-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json + --num-proc=-1 deps: - - path: configs/fine_tuning.yaml - hash: md5 - md5: 3933c4faf5a478d0f9d3963c3b29e5cc - size: 1356 - - path: configs/model/Unsloth-Mistral-7B-Instruct-v0.3.yaml - hash: md5 - md5: d184e20107315876e7751bdc7c3841ad - size: 182 - - path: scripts/sft/fine_tune_llm.py - hash: md5 - md5: 4b77ee1ea604cae18f17ca00cdb6988b - size: 4578 + - hash: md5 + md5: 839c911f542cd7c60c9ae52ef95e9907 + path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json + size: 1812429 + - hash: md5 + md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py + size: 697 outs: - - path: data/experiments/fine-tune/Unsloth-Mistral-7B-Instruct-v0.3/pl-court-instruct - hash: md5 - md5: 1b47e8203c533942e1903dd816f7a7f7.dir - size: 1518954466 - nfiles: 66 - summarize_metrics@data/experiments/predict/pl-court-instruct: - cmd: PYTHONPATH=. python scripts/sft/summarize_metrics.py --root-dir data/experiments/predict/pl-court-instruct + - hash: md5 + md5: fe43f0d25b500a0f2fb2d8199b8034fd + path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/metrics_997.json + size: 305 + evaluate_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json + --num-proc=-1 deps: - - path: scripts/sft/summarize_metrics.py - hash: md5 - md5: eb5736f5709f9773acf21bfc28c2e012 - size: 2975 + - hash: md5 + md5: 761018c0a306fbee63dad2fbc119110d + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json + size: 821683 + - hash: md5 + md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py + size: 697 outs: - - path: data/experiments/predict/pl-court-instruct/metrics_judge_summary.md - hash: md5 - md5: e5fdc8ce94d75886ec2a2b291b2b63c5 - size: 12557 - - path: data/experiments/predict/pl-court-instruct/metrics_ngram_summary.md - hash: md5 - md5: e33a4543e486ee107e787250da19b7fa - size: 2853 - evaluate_llm_as_judge@Unsloth-Llama-3-8B-Instruct-Unsloth-Llama-3-8B-Instruct: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py model=Unsloth-Llama-3-8B-Instruct - answers_file=data/experiments/predict/pl-court-instruct/outputs_Unsloth-Llama-3-8B-Instruct.json - out_metric_file=data/experiments/llm_as_judge/pl-court-instruct/judge_Unsloth-Llama-3-8B-Instruct_metrics_Unsloth-Llama-3-8B-Instruct.json - out_predictions_file=data/experiments/llm_as_judge/pl-court-instruct/judge_Unsloth-Llama-3-8B-Instruct_predictions_Unsloth-Llama-3-8B-Instruct.json - deps: - - path: - data/experiments/predict/pl-court-instruct/outputs_Unsloth-Llama-3-8B-Instruct.json - hash: md5 - md5: df2f1d464152f87737c8ebb5b0673854 - size: 2179383 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 55ffa83e2778e921bdfc677889e45a23 - size: 3676 - outs: - - path: - data/experiments/llm_as_judge/pl-court-instruct/judge_Unsloth-Llama-3-8B-Instruct_predictions_Unsloth-Llama-3-8B-Instruct.json - hash: md5 - md5: d0be277f3585e4d71d9551cd96851183 - size: 54800 - predict@Unsloth-Mistral-7B-Instruct-v0.3-997: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Unsloth-Mistral-7B-Instruct-v0.3 - random_seed=997 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_997.json - deps: - - path: configs/model/Unsloth-Mistral-7B-Instruct-v0.3.yaml - hash: md5 - md5: d184e20107315876e7751bdc7c3841ad - size: 182 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: 265776ba10a7b24b66e6bac1131e0c48 + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_42.json + size: 149 + evaluate_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json + --num-proc=-1 + deps: + - hash: md5 + md5: a7361535b440251d6ce6232a15cfcdf2 + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json + size: 818877 + - hash: md5 + md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py + size: 697 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_997.json - hash: md5 - md5: bbb883aa388b274bef3e9296df26f68f - size: 1795752 - predict@Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned - random_seed=7312 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_7312.json + - hash: md5 + md5: 97fa8dfaa5e57633e8fb6a7d073177f5 + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_7312.json + size: 147 + evaluate_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json + --num-proc=-1 deps: - - path: configs/model/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.yaml - hash: md5 - md5: 828382dc03dbed80cff4a3358321dc4a - size: 271 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: 94924275d576271875fecf22c0f9b39e + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json + size: 817490 + - hash: md5 + md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py + size: 697 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_7312.json - hash: md5 - md5: 3d336675e54a706fae45349adbaf6ee4 - size: 1793461 - predict@Unsloth-Mistral-7B-Instruct-v0.3-42: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Unsloth-Mistral-7B-Instruct-v0.3 - random_seed=42 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_42.json + - hash: md5 + md5: c3552161ec68d8cc6a8e5b75f02e22e2 + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_997.json + size: 147 + evaluate_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_42.json + --num-proc=-1 deps: - - path: configs/model/Unsloth-Mistral-7B-Instruct-v0.3.yaml - hash: md5 - md5: d184e20107315876e7751bdc7c3841ad - size: 182 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: 4246a4fafba5e130aac3db6c1c61ce30 + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_42.json + size: 675578 + - hash: md5 + md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py + size: 697 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_42.json - hash: md5 - md5: 4a9d3a2bb1dd47a732bd2df8102bc93f - size: 1799957 - predict@Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned - random_seed=997 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_997.json + - hash: md5 + md5: 016d1c87b2925c6f941400d178bee018 + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/metrics_42.json + size: 157 + evaluate_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_7312.json + --num-proc=-1 deps: - - path: configs/model/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.yaml - hash: md5 - md5: 828382dc03dbed80cff4a3358321dc4a - size: 271 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: f0b806eebca2f3ddf49d0ff821856b45 + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_7312.json + size: 670935 + - hash: md5 + md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py + size: 697 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_997.json - hash: md5 - md5: 55d682fba1c08c68552e98be6b503b4e - size: 1790731 - predict@Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned - random_seed=42 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_42.json + - hash: md5 + md5: a8459393feb773fea85ede4b831b3fa6 + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/metrics_7312.json + size: 157 + evaluate_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_997.json + --num-proc=-1 deps: - - path: configs/model/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.yaml - hash: md5 - md5: 828382dc03dbed80cff4a3358321dc4a - size: 271 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: 4e968cac351ad48ad786d1ecccbbc967 + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_997.json + size: 670674 + - hash: md5 + md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py + size: 697 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_42.json - hash: md5 - md5: 156091297490d893f9815d2ffcf17cbf - size: 1792160 - predict@Unsloth-Mistral-7B-Instruct-v0.3-7312: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Unsloth-Mistral-7B-Instruct-v0.3 - random_seed=7312 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_7312.json + - hash: md5 + md5: 21bc79aad7ab2e97b75e1d3fb18a2263 + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/metrics_997.json + size: 157 + evaluate_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json + --num-proc=-1 deps: - - path: configs/model/Unsloth-Mistral-7B-Instruct-v0.3.yaml - hash: md5 - md5: d184e20107315876e7751bdc7c3841ad - size: 182 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: 4fe25ad80a20ea5d6200136176b3e4ca + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json + size: 705218 + - hash: md5 + md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py + size: 697 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_7312.json - hash: md5 - md5: 77e10dd2ec17e12e171e4bcab1a48e08 - size: 1795629 - evaluate@Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_997.json + - hash: md5 + md5: 0b2f663a1cbc3ef08c363ec8adc53c15 + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_42.json + size: 151 + evaluate_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json + --num-proc=-1 deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_997.json - hash: md5 - md5: 55d682fba1c08c68552e98be6b503b4e - size: 1790731 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/metrics_997.json - hash: md5 - md5: 29bf759169190a4591c2da7de5399b92 - size: 306 - evaluate@Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_7312.json + - hash: md5 + md5: cf4fdbf0e26e6c793bdca4edd6e365c0 + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json + size: 703876 + - hash: md5 + md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py + size: 697 + outs: + - hash: md5 + md5: 604b5cee14ec6520b88bafecc962e031 + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_7312.json + size: 152 + evaluate_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json + --num-proc=-1 deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_7312.json - hash: md5 - md5: 3d336675e54a706fae45349adbaf6ee4 - size: 1793461 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/metrics_7312.json - hash: md5 - md5: d1d4407aa0d04db49591afede0d5e71c - size: 307 - evaluate@Unsloth-Mistral-7B-Instruct-v0.3-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_997.json + - hash: md5 + md5: 94c30cf8fe7db71afc58a5c9cdbc0d9f + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json + size: 705894 + - hash: md5 + md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py + size: 697 + outs: + - hash: md5 + md5: a91ec5b434bebd8ce1d2000e0a033cb9 + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_997.json + size: 152 + evaluate_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_42.json + --num-proc=-1 deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_997.json - hash: md5 - md5: bbb883aa388b274bef3e9296df26f68f - size: 1795752 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/metrics_997.json - hash: md5 - md5: 45085589a6e88e04d4e01ebf5d3e9bcc - size: 310 - evaluate@Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_42.json + - hash: md5 + md5: 313fa5a662f37cacae4980a04830f422 + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_42.json + size: 642688 + - hash: md5 + md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py + size: 697 + outs: + - hash: md5 + md5: f0d37c5ac017c0e488b7c3bed01c7093 + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/metrics_42.json + size: 156 + evaluate_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_7312.json + --num-proc=-1 deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_42.json - hash: md5 - md5: 156091297490d893f9815d2ffcf17cbf - size: 1792160 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/metrics_42.json - hash: md5 - md5: d6661078ff04e0791f7d5dae2e5ed99d - size: 306 - evaluate@Unsloth-Mistral-7B-Instruct-v0.3-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_42.json + - hash: md5 + md5: 4ed8db93aa14f1cc98e276d3989efa9e + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_7312.json + size: 642730 + - hash: md5 + md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py + size: 697 + outs: + - hash: md5 + md5: a1521ab06a56258759953bb02ae87e24 + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/metrics_7312.json + size: 157 + evaluate_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_997.json + --num-proc=-1 deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_42.json - hash: md5 - md5: 4a9d3a2bb1dd47a732bd2df8102bc93f - size: 1799957 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/metrics_42.json - hash: md5 - md5: c8b1c6a7c1c7b593d7555d38174685b7 - size: 308 - evaluate@Unsloth-Mistral-7B-Instruct-v0.3-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_7312.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_7312.json - hash: md5 - md5: 77e10dd2ec17e12e171e4bcab1a48e08 - size: 1795629 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/metrics_7312.json - hash: md5 - md5: 021edec6c9f831f8e6abe15d9771ac1e - size: 307 - evaluate_llm_as_judge@llama_3.1_8b_instruct-Unsloth-Mistral-7B-Instruct-v0.3-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=llama_3.1_8b_instruct - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_997.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/judge_metrics_997.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_997.json - hash: md5 - md5: bbb883aa388b274bef3e9296df26f68f - size: 1795752 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 416d51f2597f86e69bed8d510553decd - size: 2049 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/judge_metrics_997.json - hash: md5 - md5: a1fac753d33cf460dbfc64eeb7e1c89b - size: 972 - predict@Unsloth-Llama-3-8B-Instruct-42: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Unsloth-Llama-3-8B-Instruct - random_seed=42 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json - deps: - - path: configs/model/Unsloth-Llama-3-8B-Instruct.yaml - hash: md5 - md5: 56a95874b3e77e7ffec11c00330da5b6 - size: 176 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json - hash: md5 - md5: e99c88720116c951087b6125e5f4be4d - size: 2008073 - predict@Unsloth-Llama-3-8B-Instruct-7312: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Unsloth-Llama-3-8B-Instruct - random_seed=7312 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json - deps: - - path: configs/model/Unsloth-Llama-3-8B-Instruct.yaml - hash: md5 - md5: 56a95874b3e77e7ffec11c00330da5b6 - size: 176 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json - hash: md5 - md5: 4c25368aacb7402b1b2cae9368d187d1 - size: 2013637 - predict@Unsloth-Llama-3-8B-Instruct-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Unsloth-Llama-3-8B-Instruct-fine-tuned - random_seed=42 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_42.json - deps: - - path: configs/model/Unsloth-Llama-3-8B-Instruct-fine-tuned.yaml - hash: md5 - md5: b102e42e63586df07d9528d70f802b8f - size: 260 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_42.json - hash: md5 - md5: 289b719e8c7166e578417e5706bdc4e3 - size: 1760355 - predict@Unsloth-Llama-3-8B-Instruct-997: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Unsloth-Llama-3-8B-Instruct - random_seed=997 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json - deps: - - path: configs/model/Unsloth-Llama-3-8B-Instruct.yaml - hash: md5 - md5: 56a95874b3e77e7ffec11c00330da5b6 - size: 176 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json - hash: md5 - md5: baef589507248af212aaae51602fd999 - size: 2010150 - predict@Unsloth-Llama-3-8B-Instruct-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Unsloth-Llama-3-8B-Instruct-fine-tuned - random_seed=7312 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_7312.json - deps: - - path: configs/model/Unsloth-Llama-3-8B-Instruct-fine-tuned.yaml - hash: md5 - md5: b102e42e63586df07d9528d70f802b8f - size: 260 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_7312.json - hash: md5 - md5: 25bee3b4ee09b36d636095b4c927a0d3 - size: 1759194 - predict@Unsloth-Llama-3-8B-Instruct-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Unsloth-Llama-3-8B-Instruct-fine-tuned - random_seed=997 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_997.json - deps: - - path: configs/model/Unsloth-Llama-3-8B-Instruct-fine-tuned.yaml - hash: md5 - md5: b102e42e63586df07d9528d70f802b8f - size: 260 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_997.json - hash: md5 - md5: 82b2c535d99d91b9a34986375bfa31a9 - size: 1758747 - evaluate@Unsloth-Llama-3-8B-Instruct-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json - hash: md5 - md5: e99c88720116c951087b6125e5f4be4d - size: 2008073 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_42.json - hash: md5 - md5: 2116481b79c785f94b35852b6e0e4f57 - size: 304 - evaluate@Unsloth-Llama-3-8B-Instruct-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json - hash: md5 - md5: 4c25368aacb7402b1b2cae9368d187d1 - size: 2013637 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_7312.json - hash: md5 - md5: ae6eebc7a3538e73e6ae213435c3a875 - size: 307 - evaluate@Unsloth-Llama-3-8B-Instruct-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json - hash: md5 - md5: baef589507248af212aaae51602fd999 - size: 2010150 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_997.json - hash: md5 - md5: b2edea153ccd1c3a0b1e22699330de4d - size: 306 - evaluate@Unsloth-Llama-3-8B-Instruct-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_42.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_42.json - hash: md5 - md5: 289b719e8c7166e578417e5706bdc4e3 - size: 1760355 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/metrics_42.json - hash: md5 - md5: c27dac02aceaecc8ead3cb49ed2ea22d - size: 306 - evaluate@Unsloth-Llama-3-8B-Instruct-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_7312.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_7312.json - hash: md5 - md5: 25bee3b4ee09b36d636095b4c927a0d3 - size: 1759194 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/metrics_7312.json - hash: md5 - md5: 59ec16d58c706877808d77e53429bd35 - size: 306 - evaluate@Unsloth-Llama-3-8B-Instruct-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_997.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_997.json - hash: md5 - md5: 82b2c535d99d91b9a34986375bfa31a9 - size: 1758747 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/metrics_997.json - hash: md5 - md5: b0f204e659b487ff84a736792f4c0344 - size: 302 - evaluate_llm_as_judge@llama_3.1_8b_instruct-Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=llama_3.1_8b_instruct - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_997.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/judge_metrics_997.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_997.json - hash: md5 - md5: 55d682fba1c08c68552e98be6b503b4e - size: 1790731 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 416d51f2597f86e69bed8d510553decd - size: 2049 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/judge_metrics_997.json - hash: md5 - md5: bb73f2ec119c4c16761fcb6feac1d902 - size: 977 - evaluate_llm_as_judge@llama_3.1_8b_instruct-Unsloth-Llama-3-8B-Instruct-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=llama_3.1_8b_instruct - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_42.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json - hash: md5 - md5: e99c88720116c951087b6125e5f4be4d - size: 2008073 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 416d51f2597f86e69bed8d510553decd - size: 2049 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_42.json - hash: md5 - md5: 1fbffe7226913b6fdd354ef9ea980c7f - size: 1177 - evaluate_llm_as_judge@llama_3.1_8b_instruct-Unsloth-Llama-3-8B-Instruct-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=llama_3.1_8b_instruct - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_997.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json - hash: md5 - md5: baef589507248af212aaae51602fd999 - size: 2010150 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 416d51f2597f86e69bed8d510553decd - size: 2049 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_997.json - hash: md5 - md5: 69d98bbed47b4f880ade8adc2987354c - size: 1171 - evaluate_llm_as_judge@llama_3.1_8b_instruct-Unsloth-Mistral-7B-Instruct-v0.3-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=llama_3.1_8b_instruct - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_42.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/judge_metrics_42.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_42.json - hash: md5 - md5: 4a9d3a2bb1dd47a732bd2df8102bc93f - size: 1799957 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 416d51f2597f86e69bed8d510553decd - size: 2049 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/judge_metrics_42.json - hash: md5 - md5: faee13bd2b0a1006140692559925b722 - size: 995 - evaluate_llm_as_judge@llama_3.1_8b_instruct-Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=llama_3.1_8b_instruct - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_42.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/judge_metrics_42.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_42.json - hash: md5 - md5: 156091297490d893f9815d2ffcf17cbf - size: 1792160 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 416d51f2597f86e69bed8d510553decd - size: 2049 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/judge_metrics_42.json - hash: md5 - md5: 26f88b69c0756f4b2021b2855112e702 - size: 985 - evaluate_llm_as_judge@llama_3.1_8b_instruct-Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=llama_3.1_8b_instruct - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_7312.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/judge_metrics_7312.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_7312.json - hash: md5 - md5: 3d336675e54a706fae45349adbaf6ee4 - size: 1793461 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 416d51f2597f86e69bed8d510553decd - size: 2049 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/judge_metrics_7312.json - hash: md5 - md5: ceb3859ac3ccf62a7c4210f97489ccfe - size: 978 - evaluate_llm_as_judge@llama_3.1_8b_instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=llama_3.1_8b_instruct - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_7312.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_7312.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_7312.json - hash: md5 - md5: 25bee3b4ee09b36d636095b4c927a0d3 - size: 1759194 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 416d51f2597f86e69bed8d510553decd - size: 2049 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_7312.json - hash: md5 - md5: 339ca8662be51697dd8eb2226f4cb506 - size: 1163 - evaluate_llm_as_judge@llama_3.1_8b_instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=llama_3.1_8b_instruct - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_42.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_42.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_42.json - hash: md5 - md5: 289b719e8c7166e578417e5706bdc4e3 - size: 1760355 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 416d51f2597f86e69bed8d510553decd - size: 2049 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_42.json - hash: md5 - md5: 1fb63c11961122a7b5cebe2882d9a59b - size: 1166 - evaluate_llm_as_judge@llama_3.1_8b_instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=llama_3.1_8b_instruct - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_997.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_997.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_997.json - hash: md5 - md5: 82b2c535d99d91b9a34986375bfa31a9 - size: 1758747 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 416d51f2597f86e69bed8d510553decd - size: 2049 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_997.json - hash: md5 - md5: 08426af15d5a278bb2d5bb9a5ded9449 - size: 1168 - evaluate_llm_as_judge@llama_3.1_8b_instruct-Unsloth-Mistral-7B-Instruct-v0.3-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=llama_3.1_8b_instruct - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_7312.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/judge_metrics_7312.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_7312.json - hash: md5 - md5: 77e10dd2ec17e12e171e4bcab1a48e08 - size: 1795629 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 416d51f2597f86e69bed8d510553decd - size: 2049 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/judge_metrics_7312.json - hash: md5 - md5: f1d2dd11fd7a8f0bac451d50f21ae7f7 - size: 995 - evaluate_llm_as_judge@llama_3.1_8b_instruct-Unsloth-Llama-3-8B-Instruct-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=llama_3.1_8b_instruct - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_7312.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json - hash: md5 - md5: 4c25368aacb7402b1b2cae9368d187d1 - size: 2013637 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 416d51f2597f86e69bed8d510553decd - size: 2049 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_7312.json - hash: md5 - md5: cc503c0e3b3a10390457ff99e46463d0 - size: 1176 - predict@Unsloth-Mistral-Nemo-Instruct-2407-7312: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Unsloth-Mistral-Nemo-Instruct-2407 - random_seed=7312 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json - deps: - - path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407.yaml - hash: md5 - md5: ca5ac52e503c9f488f98f569811c76dc - size: 261 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json - hash: md5 - md5: 924744efce1483e9128579cad7a4454c - size: 1748772 - predict@Unsloth-Mistral-Nemo-Instruct-2407-42: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Unsloth-Mistral-Nemo-Instruct-2407 - random_seed=42 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json - deps: - - path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407.yaml - hash: md5 - md5: ca5ac52e503c9f488f98f569811c76dc - size: 261 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json - hash: md5 - md5: 1385f49966e9db2a88a17f53d0887ad8 - size: 1741944 - predict@Unsloth-Mistral-Nemo-Instruct-2407-997: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Unsloth-Mistral-Nemo-Instruct-2407 - random_seed=997 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json - deps: - - path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407.yaml - hash: md5 - md5: ca5ac52e503c9f488f98f569811c76dc - size: 261 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json - hash: md5 - md5: 4d023797a9053fd7df61f6b1796112e9 - size: 1747404 - evaluate@Unsloth-Mistral-Nemo-Instruct-2407-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json - hash: md5 - md5: 4d023797a9053fd7df61f6b1796112e9 - size: 1747404 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_997.json - hash: md5 - md5: 122c6d789f343d4e7232d8720b0f577a - size: 303 - evaluate@Unsloth-Mistral-Nemo-Instruct-2407-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json - hash: md5 - md5: 924744efce1483e9128579cad7a4454c - size: 1748772 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_7312.json - hash: md5 - md5: 2ea9afa67cf34dc1629a265bce2c3357 - size: 306 - evaluate@Unsloth-Mistral-Nemo-Instruct-2407-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json - hash: md5 - md5: 1385f49966e9db2a88a17f53d0887ad8 - size: 1741944 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_42.json - hash: md5 - md5: 37165fb96f31997589e95108f6c149a3 - size: 306 - predict@Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned - random_seed=42 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_42.json - deps: - - path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned.yaml - hash: md5 - md5: a35f354b805d105999797d19d0aa7bde - size: 352 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_42.json - hash: md5 - md5: 14d4613f7d9495f5fb5f2d7b81f402a9 - size: 1825646 - predict@Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned - random_seed=997 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_997.json - deps: - - path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned.yaml - hash: md5 - md5: a35f354b805d105999797d19d0aa7bde - size: 352 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_997.json - hash: md5 - md5: 41a47dc56efc29b6c2771db68bdacb17 - size: 1822491 - predict@Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned - random_seed=7312 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_7312.json - deps: - - path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned.yaml - hash: md5 - md5: a35f354b805d105999797d19d0aa7bde - size: 352 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_7312.json - hash: md5 - md5: 302e1dc4f064007e3df88ac1e8acccc5 - size: 1831330 - evaluate@Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_42.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_42.json - hash: md5 - md5: 14d4613f7d9495f5fb5f2d7b81f402a9 - size: 1825646 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/metrics_42.json - hash: md5 - md5: e43e3fe605787204ba1345dedaefd124 - size: 305 - evaluate@Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_7312.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_7312.json - hash: md5 - md5: 302e1dc4f064007e3df88ac1e8acccc5 - size: 1831330 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/metrics_7312.json - hash: md5 - md5: b5d0fa11fe5557bbe1ee9804d5d09cb1 - size: 303 - evaluate@Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_997.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_997.json - hash: md5 - md5: 41a47dc56efc29b6c2771db68bdacb17 - size: 1822491 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/metrics_997.json - hash: md5 - md5: 403fc36606ced0ab31d34d3d548f948e - size: 303 - evaluate_llm_as_judge@llama_3.1_8b_instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=llama_3.1_8b_instruct - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_997.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_997.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_997.json - hash: md5 - md5: f143807267139bc29888fffb37474f08 - size: 1733286 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 416d51f2597f86e69bed8d510553decd - size: 2049 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_997.json - hash: md5 - md5: c7aa386de02183e226bc89a99e66e738 - size: 1168 - evaluate_llm_as_judge@llama_3.1_8b_instruct-Unsloth-Mistral-Nemo-Instruct-2407-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=llama_3.1_8b_instruct - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_997.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json - hash: md5 - md5: 4d023797a9053fd7df61f6b1796112e9 - size: 1747404 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 416d51f2597f86e69bed8d510553decd - size: 2049 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_997.json - hash: md5 - md5: 47460b229c988d7d4131556a88f7b8de - size: 1168 - evaluate_llm_as_judge@llama_3.1_8b_instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=llama_3.1_8b_instruct - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_42.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_42.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_42.json - hash: md5 - md5: 76924fe96291bafebb4259eb39a14ba3 - size: 1730656 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 416d51f2597f86e69bed8d510553decd - size: 2049 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_42.json - hash: md5 - md5: a09671885113a77aa5988f79a599f168 - size: 1174 - evaluate_llm_as_judge@llama_3.1_8b_instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=llama_3.1_8b_instruct - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_7312.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_7312.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_7312.json - hash: md5 - md5: 4cf7e1890a19d7dd0ccc274862afc2b7 - size: 1718147 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 416d51f2597f86e69bed8d510553decd - size: 2049 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_7312.json - hash: md5 - md5: 053bcc64b1d98b2c8209954aa0cb492e - size: 1169 - evaluate_llm_as_judge@llama_3.1_8b_instruct-Unsloth-Mistral-Nemo-Instruct-2407-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=llama_3.1_8b_instruct - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_42.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json - hash: md5 - md5: 1385f49966e9db2a88a17f53d0887ad8 - size: 1741944 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 416d51f2597f86e69bed8d510553decd - size: 2049 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_42.json - hash: md5 - md5: 59a0ded89c8d51acbb8e96d5de7ac8ab - size: 1169 - evaluate_llm_as_judge@llama_3.1_8b_instruct-Unsloth-Mistral-Nemo-Instruct-2407-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=llama_3.1_8b_instruct - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_7312.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json - hash: md5 - md5: 924744efce1483e9128579cad7a4454c - size: 1748772 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 416d51f2597f86e69bed8d510553decd - size: 2049 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_7312.json - hash: md5 - md5: 08d3a5a84b968296decf22c50a99b656 - size: 1164 - sft_unsloth@Unsloth-Mistral-Nemo-Instruct-2407: - cmd: PYTHONPATH=. python scripts/sft/fine_tune_llm.py model=Unsloth-Mistral-Nemo-Instruct-2407 - deps: - - path: configs/fine_tuning.yaml - hash: md5 - md5: 3933c4faf5a478d0f9d3963c3b29e5cc - size: 1356 - - path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407.yaml - hash: md5 - md5: ca5ac52e503c9f488f98f569811c76dc - size: 261 - - path: scripts/sft/fine_tune_llm.py - hash: md5 - md5: 4b77ee1ea604cae18f17ca00cdb6988b - size: 4578 - outs: - - path: data/experiments/fine-tune/Unsloth-Mistral-Nemo-Instruct-2407/pl-court-instruct - hash: md5 - md5: 80bceb56982e9bdb8d4b441bf843014f.dir - size: 1056899473 - nfiles: 33 - predict@Bielik-7B-Instruct-v0.1-997: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Bielik-7B-Instruct-v0.1 - random_seed=997 - output_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_997.json - deps: - - path: configs/model/Bielik-7B-Instruct-v0.1.yaml - hash: md5 - md5: c3412525e9819b53fbad06363a07a871 - size: 173 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_997.json - hash: md5 - md5: fac04d78ad020b50f79fc7277a037e8e - size: 2016400 - predict_with_api@gpt-4o-997: - cmd: PYTHONPATH=. python scripts/sft/predict_with_api.py model_version=gpt-4o - seed=997 - output_file=data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json - deps: - - path: configs/predict_with_api.yaml - hash: md5 - md5: aff18078742a14c3d8ce2cd74e718d44 - size: 320 - - path: scripts/sft/predict_with_api.py - hash: md5 - md5: 610d32b0036ae6eef4480c5a30f07999 - size: 3987 - outs: - - path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json - hash: md5 - md5: 7c5833fdd1419163b286baaa3d71e084 - size: 1965252 - predict_with_api@gpt-4o-mini-997: - cmd: PYTHONPATH=. python scripts/sft/predict_with_api.py model_version=gpt-4o-mini - seed=997 - output_file=data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json - deps: - - path: configs/predict_with_api.yaml - hash: md5 - md5: aff18078742a14c3d8ce2cd74e718d44 - size: 320 - - path: scripts/sft/predict_with_api.py - hash: md5 - md5: 610d32b0036ae6eef4480c5a30f07999 - size: 3987 - outs: - - path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json - hash: md5 - md5: 839c911f542cd7c60c9ae52ef95e9907 - size: 1812429 - evaluate@open_ai_gpt-4o-mini-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json - deps: - - path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json - hash: md5 - md5: 839c911f542cd7c60c9ae52ef95e9907 - size: 1812429 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/metrics_997.json - hash: md5 - md5: fe43f0d25b500a0f2fb2d8199b8034fd - size: 305 - predict@Bielik-7B-Instruct-v0.1-42: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Bielik-7B-Instruct-v0.1 - random_seed=42 - output_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json - deps: - - path: configs/model/Bielik-7B-Instruct-v0.1.yaml - hash: md5 - md5: c3412525e9819b53fbad06363a07a871 - size: 173 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json - hash: md5 - md5: 2dc39513a04910c5d0c54380166639d9 - size: 2029644 - predict@Bielik-7B-Instruct-v0.1-7312: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Bielik-7B-Instruct-v0.1 - random_seed=7312 - output_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json - deps: - - path: configs/model/Bielik-7B-Instruct-v0.1.yaml - hash: md5 - md5: c3412525e9819b53fbad06363a07a871 - size: 173 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json - hash: md5 - md5: ae39bf31296ffe82c0f6a3e8c9ff63aa - size: 2014399 - evaluate@open_ai_gpt-4o-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json - deps: - - path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json - hash: md5 - md5: 7c5833fdd1419163b286baaa3d71e084 - size: 1965252 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/metrics_997.json - hash: md5 - md5: 65c808d4aebd8efe37b94a5128a19de6 - size: 306 - sft_unsloth@Bielik-7B-Instruct-v0.1: - cmd: PYTHONPATH=. python scripts/sft/fine_tune_llm.py model=Bielik-7B-Instruct-v0.1 - deps: - - path: configs/fine_tuning.yaml - hash: md5 - md5: 3933c4faf5a478d0f9d3963c3b29e5cc - size: 1356 - - path: configs/model/Bielik-7B-Instruct-v0.1.yaml - hash: md5 - md5: c3412525e9819b53fbad06363a07a871 - size: 173 - - path: scripts/sft/fine_tune_llm.py - hash: md5 - md5: 4b77ee1ea604cae18f17ca00cdb6988b - size: 4578 - outs: - - path: data/experiments/fine-tune/Bielik-7B-Instruct-v0.1/pl-court-instruct - hash: md5 - md5: be61ab5ea1365c1bcf908952bc015ab4.dir - size: 2293711014 - nfiles: 108 - predict@Bielik-7B-Instruct-v0.1-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Bielik-7B-Instruct-v0.1-fine-tuned - random_seed=42 - output_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_42.json - deps: - - path: configs/model/Bielik-7B-Instruct-v0.1-fine-tuned.yaml - hash: md5 - md5: 2d9590869dfe247d7c6335f3cd7dd0c2 - size: 253 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_42.json - hash: md5 - md5: 178eb0649617d4a698da6c9e315e84c5 - size: 2034749 - predict@Bielik-7B-Instruct-v0.1-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Bielik-7B-Instruct-v0.1-fine-tuned - random_seed=7312 - output_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_7312.json - deps: - - path: configs/model/Bielik-7B-Instruct-v0.1-fine-tuned.yaml - hash: md5 - md5: 2d9590869dfe247d7c6335f3cd7dd0c2 - size: 253 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_7312.json - hash: md5 - md5: 743ea22448bc73a7a991da075fca8841 - size: 2031343 - predict@Bielik-7B-Instruct-v0.1-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/predict.py model=Bielik-7B-Instruct-v0.1-fine-tuned - random_seed=997 - output_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_997.json - deps: - - path: configs/model/Bielik-7B-Instruct-v0.1-fine-tuned.yaml - hash: md5 - md5: 2d9590869dfe247d7c6335f3cd7dd0c2 - size: 253 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_997.json - hash: md5 - md5: 433a4b2aa7870a134277a265d099a588 - size: 2029482 - evaluate@Bielik-7B-Instruct-v0.1-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json - hash: md5 - md5: 2dc39513a04910c5d0c54380166639d9 - size: 2029644 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/metrics_42.json - hash: md5 - md5: 2cbca38fd0bbdb4df024f76506eeb26c - size: 307 - evaluate@Bielik-7B-Instruct-v0.1-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json - hash: md5 - md5: ae39bf31296ffe82c0f6a3e8c9ff63aa - size: 2014399 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/metrics_7312.json - hash: md5 - md5: ad13d47ca88e721be75c79c225e12ee6 - size: 289 - evaluate@Bielik-7B-Instruct-v0.1-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_997.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_997.json - hash: md5 - md5: fac04d78ad020b50f79fc7277a037e8e - size: 2016400 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/metrics_997.json - hash: md5 - md5: 83fb160145ef5e21b43f7c348658ea02 - size: 327 - evaluate@Bielik-7B-Instruct-v0.1-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_42.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_42.json - hash: md5 - md5: 178eb0649617d4a698da6c9e315e84c5 - size: 2034749 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/metrics_42.json - hash: md5 - md5: 99e684c720ca4c4ef6c4276e7d1880ab - size: 305 - evaluate@Bielik-7B-Instruct-v0.1-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_7312.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_7312.json - hash: md5 - md5: 743ea22448bc73a7a991da075fca8841 - size: 2031343 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/metrics_7312.json - hash: md5 - md5: 07d798079cedf3dc194242d6a1bc3bcd - size: 306 - evaluate@Bielik-7B-Instruct-v0.1-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_997.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_997.json - hash: md5 - md5: 433a4b2aa7870a134277a265d099a588 - size: 2029482 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/metrics_997.json - hash: md5 - md5: adb7c1e239396bbf6e308f3f1b436099 - size: 307 - build_instruct_dataset_en: - cmd: PYTHONPATH=. python scripts/dataset/build_instruct_dataset_en.py --repo-id - JuDDGES/en-court-instruct - deps: - - path: scripts/dataset/build_instruct_dataset_en.py - hash: md5 - md5: 39e530fbd8c7f3a696e117ee13578e1f - size: 5203 - predict_with_api@en-court-instruct-gpt-4o-mini-997: - cmd: PYTHONPATH=. python scripts/sft/predict_with_api.py dataset=en-court-instruct - model_version=gpt-4o-mini seed=997 - output_file=data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/outputs_997.json - deps: - - path: configs/predict_with_api.yaml - hash: md5 - md5: aff18078742a14c3d8ce2cd74e718d44 - size: 320 - - path: scripts/sft/predict_with_api.py - hash: md5 - md5: 142508c7b6df391083b0e81a3a6c4795 - size: 3968 - outs: - - path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/outputs_997.json - hash: md5 - md5: 2a0819011b3eac56e497201a9f67e310 - size: 690306 - evaluate@en-court-instruct-open_ai_gpt-4o-mini-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/outputs_997.json - deps: - - path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/outputs_997.json - hash: md5 - md5: 2a0819011b3eac56e497201a9f67e310 - size: 690306 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/metrics_997.json - hash: md5 - md5: d70eb0821aff9c9e874a421b80f7f697 - size: 155 - predict@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-42: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Llama-3-8B-Instruct - random_seed=42 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json - deps: - - path: configs/model/Unsloth-Llama-3-8B-Instruct.yaml - hash: md5 - md5: 56a95874b3e77e7ffec11c00330da5b6 - size: 176 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json - hash: md5 - md5: e99c88720116c951087b6125e5f4be4d - size: 2008073 - predict@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-7312: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Llama-3-8B-Instruct - random_seed=7312 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json - deps: - - path: configs/model/Unsloth-Llama-3-8B-Instruct.yaml - hash: md5 - md5: 56a95874b3e77e7ffec11c00330da5b6 - size: 176 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json - hash: md5 - md5: 4c25368aacb7402b1b2cae9368d187d1 - size: 2013637 - predict@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-997: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Llama-3-8B-Instruct - random_seed=997 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json - deps: - - path: configs/model/Unsloth-Llama-3-8B-Instruct.yaml - hash: md5 - md5: 56a95874b3e77e7ffec11c00330da5b6 - size: 176 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json - hash: md5 - md5: baef589507248af212aaae51602fd999 - size: 2010150 - predict@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Llama-3-8B-Instruct-fine-tuned - random_seed=42 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_42.json - deps: - - path: configs/model/Unsloth-Llama-3-8B-Instruct-fine-tuned.yaml - hash: md5 - md5: 3906c39a5c516f89ddafb7eff21615cd - size: 275 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_42.json - hash: md5 - md5: 289b719e8c7166e578417e5706bdc4e3 - size: 1760355 - predict@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Llama-3-8B-Instruct-fine-tuned - random_seed=7312 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_7312.json - deps: - - path: configs/model/Unsloth-Llama-3-8B-Instruct-fine-tuned.yaml - hash: md5 - md5: 3906c39a5c516f89ddafb7eff21615cd - size: 275 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_7312.json - hash: md5 - md5: 25bee3b4ee09b36d636095b4c927a0d3 - size: 1759194 - predict@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Llama-3-8B-Instruct-fine-tuned - random_seed=997 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_997.json - deps: - - path: configs/model/Unsloth-Llama-3-8B-Instruct-fine-tuned.yaml - hash: md5 - md5: 3906c39a5c516f89ddafb7eff21615cd - size: 275 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_997.json - hash: md5 - md5: 82b2c535d99d91b9a34986375bfa31a9 - size: 1758747 - predict@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3-42: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Mistral-7B-Instruct-v0.3 - random_seed=42 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_42.json - deps: - - path: configs/model/Unsloth-Mistral-7B-Instruct-v0.3.yaml - hash: md5 - md5: d184e20107315876e7751bdc7c3841ad - size: 182 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_42.json - hash: md5 - md5: 4a9d3a2bb1dd47a732bd2df8102bc93f - size: 1799957 - predict@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3-7312: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Mistral-7B-Instruct-v0.3 - random_seed=7312 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_7312.json - deps: - - path: configs/model/Unsloth-Mistral-7B-Instruct-v0.3.yaml - hash: md5 - md5: d184e20107315876e7751bdc7c3841ad - size: 182 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_7312.json - hash: md5 - md5: 77e10dd2ec17e12e171e4bcab1a48e08 - size: 1795629 - predict@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3-997: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Mistral-7B-Instruct-v0.3 - random_seed=997 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_997.json - deps: - - path: configs/model/Unsloth-Mistral-7B-Instruct-v0.3.yaml - hash: md5 - md5: d184e20107315876e7751bdc7c3841ad - size: 182 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_997.json - hash: md5 - md5: bbb883aa388b274bef3e9296df26f68f - size: 1795752 - predict@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned - random_seed=42 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_42.json - deps: - - path: configs/model/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.yaml - hash: md5 - md5: 828382dc03dbed80cff4a3358321dc4a - size: 271 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: 787c129090aa1b64e337b236a4391402 + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_997.json + size: 642477 + - hash: md5 + md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py + size: 697 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_42.json - hash: md5 - md5: 156091297490d893f9815d2ffcf17cbf - size: 1792160 - predict@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned - random_seed=7312 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_7312.json + - hash: md5 + md5: f3339245ea358de4b1348c8393153946 + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/metrics_997.json + size: 157 + evaluate_llm_as_judge_api_models@en-gpt_4o_mini-open_ai_gpt-4o-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/en-court-instruct/open_ai_gpt-4o/outputs_997.json + out_metric_file=data/experiments/predict/en-court-instruct/open_ai_gpt-4o/judge_metrics_997.json + prompt=en deps: - - path: configs/model/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.yaml - hash: md5 - md5: 828382dc03dbed80cff4a3358321dc4a - size: 271 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: 8f70e2baa0b0ae8a320577f5c8a60011 + path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o/outputs_997.json + size: 679432 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_7312.json - hash: md5 - md5: 3d336675e54a706fae45349adbaf6ee4 - size: 1793461 - predict@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned - random_seed=997 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_997.json + - hash: md5 + md5: 1ad8736bed0fff4e88a9c32775f370bf + path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o/judge_metrics_997.json + size: 481 + evaluate_llm_as_judge_api_models@en-gpt_4o_mini-open_ai_gpt-4o-mini-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/outputs_997.json + out_metric_file=data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/judge_metrics_997.json + prompt=en deps: - - path: configs/model/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.yaml - hash: md5 - md5: 828382dc03dbed80cff4a3358321dc4a - size: 271 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: 2a0819011b3eac56e497201a9f67e310 + path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/outputs_997.json + size: 690306 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_997.json - hash: md5 - md5: 55d682fba1c08c68552e98be6b503b4e - size: 1790731 - predict@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-42: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Mistral-Nemo-Instruct-2407 - random_seed=42 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json + - hash: md5 + md5: bd272bea099716c0c2e689a2d19c0071 + path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/judge_metrics_997.json + size: 488 + evaluate_llm_as_judge_api_models@pl-gpt_4o_mini-open_ai_gpt-4o-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json + out_metric_file=data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/judge_metrics_997.json + prompt=pl deps: - - path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407.yaml - hash: md5 - md5: ca5ac52e503c9f488f98f569811c76dc - size: 261 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: 7c5833fdd1419163b286baaa3d71e084 + path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json + size: 1965252 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json - hash: md5 - md5: 1385f49966e9db2a88a17f53d0887ad8 - size: 1741944 - predict@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-7312: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Mistral-Nemo-Instruct-2407 - random_seed=7312 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json + - hash: md5 + md5: 867f10aeb55a3bd46b08c8a75c3bfc60 + path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/judge_metrics_997.json + size: 1176 + evaluate_llm_as_judge_api_models@pl-gpt_4o_mini-open_ai_gpt-4o-mini-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json + out_metric_file=data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/judge_metrics_997.json + prompt=pl deps: - - path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407.yaml - hash: md5 - md5: ca5ac52e503c9f488f98f569811c76dc - size: 261 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: 839c911f542cd7c60c9ae52ef95e9907 + path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json + size: 1812429 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json - hash: md5 - md5: 924744efce1483e9128579cad7a4454c - size: 1748772 - predict@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-997: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Mistral-Nemo-Instruct-2407 - random_seed=997 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json + - hash: md5 + md5: 24037233e5abe74fe13f69dd4fc5e26a + path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/judge_metrics_997.json + size: 1173 + evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json + out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_42.json + prompt=en deps: - - path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407.yaml - hash: md5 - md5: ca5ac52e503c9f488f98f569811c76dc - size: 261 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: 761018c0a306fbee63dad2fbc119110d + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json + size: 821683 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json - hash: md5 - md5: 4d023797a9053fd7df61f6b1796112e9 - size: 1747404 - predict@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned - random_seed=42 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_42.json + - hash: md5 + md5: 77ecbff8c82afbfd6fec098fb87e1218 + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_42.json + size: 478 + evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json + out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_7312.json + prompt=en deps: - - path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned.yaml - hash: md5 - md5: 1d9e6407d121214f949d56ca5c3425f5 - size: 367 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: a7361535b440251d6ce6232a15cfcdf2 + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json + size: 818877 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_42.json - hash: md5 - md5: 14d4613f7d9495f5fb5f2d7b81f402a9 - size: 1825646 - predict@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned - random_seed=7312 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_7312.json + - hash: md5 + md5: f25c9ad98ef817e976def98d6b7d3b5d + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_7312.json + size: 482 + evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json + out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_997.json + prompt=en deps: - - path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned.yaml - hash: md5 - md5: 1d9e6407d121214f949d56ca5c3425f5 - size: 367 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: 94924275d576271875fecf22c0f9b39e + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json + size: 817490 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_7312.json - hash: md5 - md5: 302e1dc4f064007e3df88ac1e8acccc5 - size: 1831330 - predict@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned - random_seed=997 - output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_997.json + - hash: md5 + md5: 4395c32931d25a1bd9aa092c5a0e5460 + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_997.json + size: 478 + evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_42.json + out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/judge_metrics_42.json + prompt=en deps: - - path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned.yaml - hash: md5 - md5: 1d9e6407d121214f949d56ca5c3425f5 - size: 367 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: 4246a4fafba5e130aac3db6c1c61ce30 + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_42.json + size: 675578 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_997.json - hash: md5 - md5: 41a47dc56efc29b6c2771db68bdacb17 - size: 1822491 - predict@pl-court-instruct-Bielik-7B-Instruct-v0.1-42: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Bielik-7B-Instruct-v0.1 - random_seed=42 - output_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json + - hash: md5 + md5: 5f2cea81c873a3b85ef95ba9a6dc90a5 + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/judge_metrics_42.json + size: 487 + evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_7312.json + out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/judge_metrics_7312.json + prompt=en deps: - - path: configs/model/Bielik-7B-Instruct-v0.1.yaml - hash: md5 - md5: c3412525e9819b53fbad06363a07a871 - size: 173 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: f0b806eebca2f3ddf49d0ff821856b45 + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_7312.json + size: 670935 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json - hash: md5 - md5: 2dc39513a04910c5d0c54380166639d9 - size: 2029644 - predict@pl-court-instruct-Bielik-7B-Instruct-v0.1-7312: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Bielik-7B-Instruct-v0.1 - random_seed=7312 - output_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json + - hash: md5 + md5: 5cc45cac8a7607e42a8a394593d33396 + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/judge_metrics_7312.json + size: 486 + evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_997.json + out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/judge_metrics_997.json + prompt=en deps: - - path: configs/model/Bielik-7B-Instruct-v0.1.yaml - hash: md5 - md5: c3412525e9819b53fbad06363a07a871 - size: 173 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: 4e968cac351ad48ad786d1ecccbbc967 + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_997.json + size: 670674 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json - hash: md5 - md5: ae39bf31296ffe82c0f6a3e8c9ff63aa - size: 2014399 - predict@pl-court-instruct-Bielik-7B-Instruct-v0.1-997: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Bielik-7B-Instruct-v0.1 - random_seed=997 - output_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_997.json + - hash: md5 + md5: 90c2b0cd132130d0b9d3a60bf6fdd69b + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/judge_metrics_997.json + size: 486 + evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json + out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_42.json + prompt=en deps: - - path: configs/model/Bielik-7B-Instruct-v0.1.yaml - hash: md5 - md5: c3412525e9819b53fbad06363a07a871 - size: 173 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: 4fe25ad80a20ea5d6200136176b3e4ca + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json + size: 705218 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_997.json - hash: md5 - md5: fac04d78ad020b50f79fc7277a037e8e - size: 2016400 - predict@pl-court-instruct-Bielik-7B-Instruct-v0.1-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Bielik-7B-Instruct-v0.1-fine-tuned - random_seed=42 - output_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_42.json + - hash: md5 + md5: 69901f631da4ffefd09e7cbfac39cd89 + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_42.json + size: 480 + evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json + out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_7312.json + prompt=en deps: - - path: configs/model/Bielik-7B-Instruct-v0.1-fine-tuned.yaml - hash: md5 - md5: 2d9590869dfe247d7c6335f3cd7dd0c2 - size: 253 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: cf4fdbf0e26e6c793bdca4edd6e365c0 + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json + size: 703876 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_42.json - hash: md5 - md5: 178eb0649617d4a698da6c9e315e84c5 - size: 2034749 - predict@pl-court-instruct-Bielik-7B-Instruct-v0.1-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Bielik-7B-Instruct-v0.1-fine-tuned - random_seed=7312 - output_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_7312.json + - hash: md5 + md5: 860b5c00ace1f2967db9b5a977cfc3ad + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_7312.json + size: 478 + evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json + out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_997.json + prompt=en deps: - - path: configs/model/Bielik-7B-Instruct-v0.1-fine-tuned.yaml - hash: md5 - md5: 2d9590869dfe247d7c6335f3cd7dd0c2 - size: 253 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: 94c30cf8fe7db71afc58a5c9cdbc0d9f + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json + size: 705894 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_7312.json - hash: md5 - md5: 743ea22448bc73a7a991da075fca8841 - size: 2031343 - predict@pl-court-instruct-Bielik-7B-Instruct-v0.1-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Bielik-7B-Instruct-v0.1-fine-tuned - random_seed=997 - output_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_997.json + - hash: md5 + md5: 860b5c00ace1f2967db9b5a977cfc3ad + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_997.json + size: 478 + evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_42.json + out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/judge_metrics_42.json + prompt=en deps: - - path: configs/model/Bielik-7B-Instruct-v0.1-fine-tuned.yaml - hash: md5 - md5: 2d9590869dfe247d7c6335f3cd7dd0c2 - size: 253 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: 313fa5a662f37cacae4980a04830f422 + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_42.json + size: 642688 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_997.json - hash: md5 - md5: 433a4b2aa7870a134277a265d099a588 - size: 2029482 - sft_unsloth@pl-court-instruct-Unsloth-Llama-3-8B-Instruct: - cmd: PYTHONPATH=. python scripts/sft/fine_tune_llm.py dataset=pl-court-instruct - model=Unsloth-Llama-3-8B-Instruct + - hash: md5 + md5: 974e972a09d844a77840029d642e8077 + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/judge_metrics_42.json + size: 486 + evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_7312.json + out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/judge_metrics_7312.json + prompt=en deps: - - path: configs/fine_tuning.yaml - hash: md5 - md5: 3933c4faf5a478d0f9d3963c3b29e5cc - size: 1356 - - path: configs/model/Unsloth-Llama-3-8B-Instruct.yaml - hash: md5 - md5: 56a95874b3e77e7ffec11c00330da5b6 - size: 176 - - path: scripts/sft/fine_tune_llm.py - hash: md5 - md5: 4b77ee1ea604cae18f17ca00cdb6988b - size: 4578 + - hash: md5 + md5: 4ed8db93aa14f1cc98e276d3989efa9e + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_7312.json + size: 642730 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: data/experiments/fine-tune/Unsloth-Llama-3-8B-Instruct/pl-court-instruct/ - hash: md5 - md5: d9850d30d221f257e1453a66a6c1eef3.dir - size: 784320233 - nfiles: 33 - sft_unsloth@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3: - cmd: PYTHONPATH=. python scripts/sft/fine_tune_llm.py dataset=pl-court-instruct - model=Unsloth-Mistral-7B-Instruct-v0.3 + - hash: md5 + md5: 8a9712eb10a8da99d86bab8968fd3207 + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/judge_metrics_7312.json + size: 485 + evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_997.json + out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/judge_metrics_997.json + prompt=en deps: - - path: configs/fine_tuning.yaml - hash: md5 - md5: 3933c4faf5a478d0f9d3963c3b29e5cc - size: 1356 - - path: configs/model/Unsloth-Mistral-7B-Instruct-v0.3.yaml - hash: md5 - md5: d184e20107315876e7751bdc7c3841ad - size: 182 - - path: scripts/sft/fine_tune_llm.py - hash: md5 - md5: 4b77ee1ea604cae18f17ca00cdb6988b - size: 4578 + - hash: md5 + md5: 787c129090aa1b64e337b236a4391402 + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_997.json + size: 642477 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: data/experiments/fine-tune/Unsloth-Mistral-7B-Instruct-v0.3/pl-court-instruct/ - hash: md5 - md5: 1b47e8203c533942e1903dd816f7a7f7.dir - size: 1518954466 - nfiles: 66 - sft_unsloth@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407: - cmd: PYTHONPATH=. python scripts/sft/fine_tune_llm.py dataset=pl-court-instruct - model=Unsloth-Mistral-Nemo-Instruct-2407 + - hash: md5 + md5: 34de8eabaebe6a96b4b664b664f222e2 + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/judge_metrics_997.json + size: 484 + evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-11B-v2.2-Instruct-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_42.json + out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/judge_metrics_42.json + prompt=pl deps: - - path: configs/fine_tuning.yaml - hash: md5 - md5: 3933c4faf5a478d0f9d3963c3b29e5cc - size: 1356 - - path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407.yaml - hash: md5 - md5: ca5ac52e503c9f488f98f569811c76dc - size: 261 - - path: scripts/sft/fine_tune_llm.py - hash: md5 - md5: 4b77ee1ea604cae18f17ca00cdb6988b - size: 4578 + - hash: md5 + md5: c3e404c898e3e193ac3aa910187b4f9f + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_42.json + size: 1734129 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: data/experiments/fine-tune/Unsloth-Mistral-Nemo-Instruct-2407/pl-court-instruct/ - hash: md5 - md5: 80bceb56982e9bdb8d4b441bf843014f.dir - size: 1056899473 - nfiles: 33 - sft_unsloth@pl-court-instruct-Bielik-7B-Instruct-v0.1: - cmd: PYTHONPATH=. python scripts/sft/fine_tune_llm.py dataset=pl-court-instruct - model=Bielik-7B-Instruct-v0.1 + - hash: md5 + md5: 198f24599357bc230bf9f1e39a235a44 + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/judge_metrics_42.json + size: 1172 + evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-11B-v2.2-Instruct-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_7312.json + out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/judge_metrics_7312.json + prompt=pl deps: - - path: configs/fine_tuning.yaml - hash: md5 - md5: 3933c4faf5a478d0f9d3963c3b29e5cc - size: 1356 - - path: configs/model/Bielik-7B-Instruct-v0.1.yaml - hash: md5 - md5: c3412525e9819b53fbad06363a07a871 - size: 173 - - path: scripts/sft/fine_tune_llm.py - hash: md5 - md5: 4b77ee1ea604cae18f17ca00cdb6988b - size: 4578 + - hash: md5 + md5: d4a2ab2393a58f0d7e1897859eccb626 + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_7312.json + size: 1734772 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: data/experiments/fine-tune/Bielik-7B-Instruct-v0.1/pl-court-instruct/ - hash: md5 - md5: be61ab5ea1365c1bcf908952bc015ab4.dir - size: 2293711014 - nfiles: 108 - predict@en-court-instruct-Unsloth-Llama-3-8B-Instruct-7312: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=en-court-instruct model=Unsloth-Llama-3-8B-Instruct - random_seed=7312 - output_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json + - hash: md5 + md5: 81cfdaa675ef2118cf923e57cc54d201 + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/judge_metrics_7312.json + size: 1161 + evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-11B-v2.2-Instruct-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_997.json + out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/judge_metrics_997.json + prompt=pl deps: - - path: configs/model/Unsloth-Llama-3-8B-Instruct.yaml - hash: md5 - md5: 56a95874b3e77e7ffec11c00330da5b6 - size: 176 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: 8f4f6bc97e33b3b2728bebb7620a4968 + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_997.json + size: 1731689 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json - hash: md5 - md5: a7361535b440251d6ce6232a15cfcdf2 - size: 818877 - predict@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-997: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=en-court-instruct model=Unsloth-Mistral-Nemo-Instruct-2407 - random_seed=997 - output_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json + - hash: md5 + md5: c5861ffaa439ba9bbd95b954d6ab1f3d + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/judge_metrics_997.json + size: 1168 + evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-11B-v2.2-Instruct-fine-tuned-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_42.json + out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/judge_metrics_42.json + prompt=pl deps: - - path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407.yaml - hash: md5 - md5: ca5ac52e503c9f488f98f569811c76dc - size: 261 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: dfd5d7389b312686428cc967aea5a5b9 + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_42.json + size: 1860743 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json - hash: md5 - md5: 94c30cf8fe7db71afc58a5c9cdbc0d9f - size: 705894 - predict@en-court-instruct-Unsloth-Llama-3-8B-Instruct-997: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=en-court-instruct model=Unsloth-Llama-3-8B-Instruct - random_seed=997 - output_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json + - hash: md5 + md5: abcd5722e84ec3e81ff8cf28b8a887cb + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/judge_metrics_42.json + size: 1165 + evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-11B-v2.2-Instruct-fine-tuned-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_7312.json + out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/judge_metrics_7312.json + prompt=pl deps: - - path: configs/model/Unsloth-Llama-3-8B-Instruct.yaml - hash: md5 - md5: 56a95874b3e77e7ffec11c00330da5b6 - size: 176 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: 8fa2faeda5a577c06cd6bf35b8702330 + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_7312.json + size: 1857569 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json - hash: md5 - md5: 94924275d576271875fecf22c0f9b39e - size: 817490 - sft_unsloth@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407: - cmd: PYTHONPATH=. python scripts/sft/fine_tune_llm.py dataset=en-court-instruct - model=Unsloth-Mistral-Nemo-Instruct-2407 + - hash: md5 + md5: 4b77a3d10cd6027e7e141ba80e9678c2 + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/judge_metrics_7312.json + size: 1160 + evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-11B-v2.2-Instruct-fine-tuned-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_997.json + out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/judge_metrics_997.json + prompt=pl deps: - - path: configs/fine_tuning.yaml - hash: md5 - md5: 3933c4faf5a478d0f9d3963c3b29e5cc - size: 1356 - - path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407.yaml - hash: md5 - md5: ca5ac52e503c9f488f98f569811c76dc - size: 261 - - path: scripts/sft/fine_tune_llm.py - hash: md5 - md5: 4b77ee1ea604cae18f17ca00cdb6988b - size: 4578 + - hash: md5 + md5: ba53d76f701eddb60a182de49d992878 + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_997.json + size: 1857855 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: data/experiments/fine-tune/Unsloth-Mistral-Nemo-Instruct-2407/en-court-instruct/ - hash: md5 - md5: 4c4f973ee0648610fc4b696059fae47a.dir - size: 475726484 - nfiles: 18 - predict@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-7312: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=en-court-instruct model=Unsloth-Mistral-Nemo-Instruct-2407 - random_seed=7312 - output_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json + - hash: md5 + md5: 9e60a1ed6002a0349656c0bd23bc7b1c + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/judge_metrics_997.json + size: 1164 + evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-7B-Instruct-v0.1-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json + out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_42.json + prompt=pl deps: - - path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407.yaml - hash: md5 - md5: ca5ac52e503c9f488f98f569811c76dc - size: 261 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: 2dc39513a04910c5d0c54380166639d9 + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json + size: 2029644 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json - hash: md5 - md5: cf4fdbf0e26e6c793bdca4edd6e365c0 - size: 703876 - predict@en-court-instruct-Unsloth-Llama-3-8B-Instruct-42: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=en-court-instruct model=Unsloth-Llama-3-8B-Instruct - random_seed=42 - output_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json + - hash: md5 + md5: 243da4df07c6dfb5199b925e3f5c07aa + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_42.json + size: 1137 + evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-7B-Instruct-v0.1-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json + out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_7312.json + prompt=pl deps: - - path: configs/model/Unsloth-Llama-3-8B-Instruct.yaml - hash: md5 - md5: 56a95874b3e77e7ffec11c00330da5b6 - size: 176 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: ae39bf31296ffe82c0f6a3e8c9ff63aa + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json + size: 2014399 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json - hash: md5 - md5: 761018c0a306fbee63dad2fbc119110d - size: 821683 - predict@pl-court-instruct-trurl-13B-academic-7312: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=trurl-13B-academic - random_seed=7312 - output_file=data/experiments/predict/pl-court-instruct/trurl-13B-academic/outputs_7312.json + - hash: md5 + md5: 8098cc937d57455ca47d32c3449159a3 + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_7312.json + size: 1129 + evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-7B-Instruct-v0.1-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_997.json + out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_997.json + prompt=pl deps: - - path: configs/model/trurl-13B-academic.yaml - hash: md5 - md5: 3aa3ce4fc9a1958bef82b8dbfd44ab6b - size: 168 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: fac04d78ad020b50f79fc7277a037e8e + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_997.json + size: 2016400 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: data/experiments/predict/pl-court-instruct/trurl-13B-academic/outputs_7312.json - hash: md5 - md5: bcd41ca4629d4cec2440a8ed2f02560f - size: 1283974 - predict@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-42: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=en-court-instruct model=Unsloth-Mistral-Nemo-Instruct-2407 - random_seed=42 - output_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json + - hash: md5 + md5: f1390b2d50893a17c90fc277dc363d6a + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_997.json + size: 1139 + evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-7B-Instruct-v0.1-fine-tuned-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_42.json + out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_42.json + prompt=pl deps: - - path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407.yaml - hash: md5 - md5: ca5ac52e503c9f488f98f569811c76dc - size: 261 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: 178eb0649617d4a698da6c9e315e84c5 + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_42.json + size: 2034749 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json - hash: md5 - md5: 4fe25ad80a20ea5d6200136176b3e4ca - size: 705218 - predict@pl-court-instruct-trurl-13B-academic-997: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=trurl-13B-academic - random_seed=997 - output_file=data/experiments/predict/pl-court-instruct/trurl-13B-academic/outputs_997.json + - hash: md5 + md5: 302b957707520fa327d1da0edf18baa3 + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_42.json + size: 1167 + evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-7B-Instruct-v0.1-fine-tuned-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_7312.json + out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_7312.json + prompt=pl deps: - - path: configs/model/trurl-13B-academic.yaml - hash: md5 - md5: 3aa3ce4fc9a1958bef82b8dbfd44ab6b - size: 168 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: 743ea22448bc73a7a991da075fca8841 + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_7312.json + size: 2031343 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: data/experiments/predict/pl-court-instruct/trurl-13B-academic/outputs_997.json - hash: md5 - md5: 731cff0eb1484682de211336efeff153 - size: 1288941 - predict@pl-court-instruct-trurl-13B-academic-42: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=trurl-13B-academic - random_seed=42 - output_file=data/experiments/predict/pl-court-instruct/trurl-13B-academic/outputs_42.json + - hash: md5 + md5: 789f0906846251d3f0cab78d111f9c56 + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_7312.json + size: 1163 + evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-7B-Instruct-v0.1-fine-tuned-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_997.json + out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_997.json + prompt=pl deps: - - path: configs/model/trurl-13B-academic.yaml - hash: md5 - md5: 3aa3ce4fc9a1958bef82b8dbfd44ab6b - size: 168 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: 433a4b2aa7870a134277a265d099a588 + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_997.json + size: 2029482 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: data/experiments/predict/pl-court-instruct/trurl-13B-academic/outputs_42.json - hash: md5 - md5: bb571102170940efc73f02143a530d5b - size: 1289839 - evaluate@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json - --num-proc=-1 + - hash: md5 + md5: 90f3ed04ef29c5cd29b7ec8f02a780a1 + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_997.json + size: 1163 + evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json + out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_42.json + prompt=pl deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json - hash: md5 + - hash: md5 md5: e99c88720116c951087b6125e5f4be4d + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json size: 2008073 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - size: 697 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_42.json - hash: md5 - md5: 2116481b79c785f94b35852b6e0e4f57 - size: 304 - evaluate@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json - --num-proc=-1 + - hash: md5 + md5: 9d9fba0cf2169e9dd9f69579a2182b8e + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_42.json + size: 1172 + evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json + out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_7312.json + prompt=pl deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json - hash: md5 + - hash: md5 md5: 4c25368aacb7402b1b2cae9368d187d1 + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json size: 2013637 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - size: 697 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_7312.json - hash: md5 - md5: 5e851a38b322caff59de90004eb4a075 - size: 305 - evaluate@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json - --num-proc=-1 + - hash: md5 + md5: e58171fc082d33c84497a13dabcf766c + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_7312.json + size: 1167 + evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json + out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_997.json + prompt=pl deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json - hash: md5 + - hash: md5 md5: baef589507248af212aaae51602fd999 + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json size: 2010150 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - size: 697 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_997.json - hash: md5 - md5: 8c3af9851700f2ff640dd9c8dc92b06d - size: 307 - evaluate@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_42.json - --num-proc=-1 + - hash: md5 + md5: f8d16a5298fabe288486822779470cd8 + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_997.json + size: 1165 + evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-fine-tuned-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_42.json + out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_42.json + prompt=pl deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_42.json - hash: md5 + - hash: md5 md5: 289b719e8c7166e578417e5706bdc4e3 + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_42.json size: 1760355 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - size: 697 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/metrics_42.json - hash: md5 - md5: 35fc5163dfb37097b814afcc79e91074 - size: 304 - evaluate@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_7312.json - --num-proc=-1 + - hash: md5 + md5: 70398042d030309e7e0bc7ba927136f3 + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_42.json + size: 1167 + evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-fine-tuned-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_7312.json + out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_7312.json + prompt=pl deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_7312.json - hash: md5 + - hash: md5 md5: 25bee3b4ee09b36d636095b4c927a0d3 + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_7312.json size: 1759194 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - size: 697 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/metrics_7312.json - hash: md5 - md5: f7c13c964cc9e225fa794935cbf6515c - size: 305 - evaluate@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_997.json - --num-proc=-1 + - hash: md5 + md5: 9d22089c8d23bbc5a028c748e5522c23 + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_7312.json + size: 1157 + evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-fine-tuned-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_997.json + out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_997.json + prompt=pl deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_997.json - hash: md5 + - hash: md5 md5: 82b2c535d99d91b9a34986375bfa31a9 + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_997.json size: 1758747 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - size: 697 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/metrics_997.json - hash: md5 - md5: 68f0244a7871bae1e8bd0642a0f2c22e - size: 305 - evaluate@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_42.json - --num-proc=-1 + - hash: md5 + md5: 4222d5b165de8a3a89d71d6519b71b76 + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_997.json + size: 1170 + evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json + out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_42.json + prompt=pl deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_42.json - hash: md5 - md5: 4a9d3a2bb1dd47a732bd2df8102bc93f - size: 1799957 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - size: 697 + - hash: md5 + md5: 1385f49966e9db2a88a17f53d0887ad8 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json + size: 1741944 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/metrics_42.json - hash: md5 - md5: 703e92a1c58aca701b128fd28f4697a4 - size: 306 - evaluate@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_7312.json - --num-proc=-1 + - hash: md5 + md5: f4bac633a65afde9bf5612f35c3089bb + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_42.json + size: 1170 + evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json + out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_7312.json + prompt=pl deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_7312.json - hash: md5 - md5: 77e10dd2ec17e12e171e4bcab1a48e08 - size: 1795629 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - size: 697 + - hash: md5 + md5: 924744efce1483e9128579cad7a4454c + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json + size: 1748772 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/metrics_7312.json - hash: md5 - md5: 6f3c13385fefb9e38f01a42bb210e3f5 - size: 309 - evaluate@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_997.json - --num-proc=-1 + - hash: md5 + md5: 1f95777ef87a547fa7a41dc597adfc39 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_7312.json + size: 1166 + evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json + out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_997.json + prompt=pl deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_997.json - hash: md5 - md5: bbb883aa388b274bef3e9296df26f68f - size: 1795752 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - size: 697 + - hash: md5 + md5: 4d023797a9053fd7df61f6b1796112e9 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json + size: 1747404 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/metrics_997.json - hash: md5 - md5: 533df4f640eb0699b5382cc759e0a45d - size: 310 - evaluate@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_42.json - --num-proc=-1 + - hash: md5 + md5: de3f557dfdf3440262e4d8f811e526ca + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_997.json + size: 1167 + evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_42.json + out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_42.json + prompt=pl deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_42.json - hash: md5 - md5: 156091297490d893f9815d2ffcf17cbf - size: 1792160 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - size: 697 + - hash: md5 + md5: 14d4613f7d9495f5fb5f2d7b81f402a9 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_42.json + size: 1825646 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/metrics_42.json - hash: md5 - md5: 89338c144457d5297d844c5f9b341f9f - size: 307 - evaluate@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_7312.json - --num-proc=-1 + - hash: md5 + md5: e8cff190991ee3164825dbf7eca03d12 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_42.json + size: 1170 + evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_7312.json + out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_7312.json + prompt=pl deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_7312.json - hash: md5 - md5: 3d336675e54a706fae45349adbaf6ee4 - size: 1793461 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - size: 697 + - hash: md5 + md5: 302e1dc4f064007e3df88ac1e8acccc5 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_7312.json + size: 1831330 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/metrics_7312.json - hash: md5 - md5: 8c6ac6f31de90a1fb08d73d08a8544dc - size: 305 - evaluate@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_997.json - --num-proc=-1 + - hash: md5 + md5: aee4a08e0a4d0398b34a2587c039244d + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_7312.json + size: 1169 + evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini + answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_997.json + out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_997.json + prompt=pl deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_997.json - hash: md5 - md5: 55d682fba1c08c68552e98be6b503b4e - size: 1790731 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - size: 697 + - hash: md5 + md5: 41a47dc56efc29b6c2771db68bdacb17 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_997.json + size: 1822491 + - hash: md5 + md5: 79a02fb864cb279f93fc4171043bb31c + path: scripts/sft/evaluate_llm_as_judge.py + size: 2253 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/metrics_997.json - hash: md5 - md5: 9873757edce9412fdee1ef45513f26ac - size: 307 - evaluate@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json + - hash: md5 + md5: aac703269b10c85d1a2b5303c22ca077 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_997.json + size: 1168 + evaluate_pl@pl-court-instruct-Bielik-11B-v2.2-Instruct-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_42.json --num-proc=-1 deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json - hash: md5 - md5: 1385f49966e9db2a88a17f53d0887ad8 - size: 1741944 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 + md5: c3e404c898e3e193ac3aa910187b4f9f + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_42.json + size: 1734129 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_42.json - hash: md5 - md5: 37165fb96f31997589e95108f6c149a3 + - hash: md5 + md5: a75ab0f8f8238ab8c86397dd015fd31d + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/metrics_42.json size: 306 - evaluate@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json + evaluate_pl@pl-court-instruct-Bielik-11B-v2.2-Instruct-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_7312.json --num-proc=-1 deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json - hash: md5 - md5: 924744efce1483e9128579cad7a4454c - size: 1748772 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 + md5: d4a2ab2393a58f0d7e1897859eccb626 + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_7312.json + size: 1734772 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_7312.json - hash: md5 - md5: 2ea9afa67cf34dc1629a265bce2c3357 - size: 306 - evaluate@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json + - hash: md5 + md5: d5861dc30fca8f9bd2d311d924b3905d + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/metrics_7312.json + size: 305 + evaluate_pl@pl-court-instruct-Bielik-11B-v2.2-Instruct-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_997.json --num-proc=-1 deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json - hash: md5 - md5: 4d023797a9053fd7df61f6b1796112e9 - size: 1747404 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 + md5: 8f4f6bc97e33b3b2728bebb7620a4968 + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_997.json + size: 1731689 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_997.json - hash: md5 - md5: 122c6d789f343d4e7232d8720b0f577a - size: 303 - evaluate@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_42.json + - hash: md5 + md5: cd6699727392af2d61383b05fa962741 + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/metrics_997.json + size: 306 + evaluate_pl@pl-court-instruct-Bielik-11B-v2.2-Instruct-fine-tuned-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_42.json --num-proc=-1 deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_42.json - hash: md5 - md5: 14d4613f7d9495f5fb5f2d7b81f402a9 - size: 1825646 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 + md5: dfd5d7389b312686428cc967aea5a5b9 + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_42.json + size: 1860743 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/metrics_42.json - hash: md5 - md5: e43e3fe605787204ba1345dedaefd124 - size: 305 - evaluate@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_7312.json + - hash: md5 + md5: d1462bb74d1f8790270a5d97c674891c + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/metrics_42.json + size: 304 + evaluate_pl@pl-court-instruct-Bielik-11B-v2.2-Instruct-fine-tuned-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_7312.json --num-proc=-1 deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_7312.json - hash: md5 - md5: 302e1dc4f064007e3df88ac1e8acccc5 - size: 1831330 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 + md5: 8fa2faeda5a577c06cd6bf35b8702330 + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_7312.json + size: 1857569 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/metrics_7312.json - hash: md5 - md5: b5d0fa11fe5557bbe1ee9804d5d09cb1 - size: 303 - evaluate@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_997.json + - hash: md5 + md5: 5edacea1e40b97765c7eaa7b4991ab16 + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/metrics_7312.json + size: 306 + evaluate_pl@pl-court-instruct-Bielik-11B-v2.2-Instruct-fine-tuned-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_997.json --num-proc=-1 deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_997.json - hash: md5 - md5: 41a47dc56efc29b6c2771db68bdacb17 - size: 1822491 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 + md5: ba53d76f701eddb60a182de49d992878 + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_997.json + size: 1857855 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/metrics_997.json - hash: md5 - md5: 403fc36606ced0ab31d34d3d548f948e - size: 303 - evaluate@pl-court-instruct-Bielik-7B-Instruct-v0.1-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json + - hash: md5 + md5: 84fbcf83da746f9e98f70ab22be6f238 + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/metrics_997.json + size: 304 + evaluate_pl@pl-court-instruct-Bielik-7B-Instruct-v0.1-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json --num-proc=-1 deps: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json - hash: md5 + - hash: md5 md5: 2dc39513a04910c5d0c54380166639d9 + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json size: 2029644 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/metrics_42.json - hash: md5 + - hash: md5 md5: 2cbca38fd0bbdb4df024f76506eeb26c + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/metrics_42.json size: 307 - evaluate@pl-court-instruct-Bielik-7B-Instruct-v0.1-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json + evaluate_pl@pl-court-instruct-Bielik-7B-Instruct-v0.1-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json --num-proc=-1 deps: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json - hash: md5 + - hash: md5 md5: ae39bf31296ffe82c0f6a3e8c9ff63aa + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json size: 2014399 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/metrics_7312.json - hash: md5 + - hash: md5 md5: ad13d47ca88e721be75c79c225e12ee6 + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/metrics_7312.json size: 289 - evaluate@pl-court-instruct-Bielik-7B-Instruct-v0.1-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_997.json + evaluate_pl@pl-court-instruct-Bielik-7B-Instruct-v0.1-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_997.json --num-proc=-1 deps: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_997.json - hash: md5 + - hash: md5 md5: fac04d78ad020b50f79fc7277a037e8e + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_997.json size: 2016400 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/metrics_997.json - hash: md5 + - hash: md5 md5: 83fb160145ef5e21b43f7c348658ea02 + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/metrics_997.json size: 327 - evaluate@pl-court-instruct-Bielik-7B-Instruct-v0.1-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_42.json + evaluate_pl@pl-court-instruct-Bielik-7B-Instruct-v0.1-fine-tuned-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_42.json --num-proc=-1 deps: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_42.json - hash: md5 + - hash: md5 md5: 178eb0649617d4a698da6c9e315e84c5 + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_42.json size: 2034749 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/metrics_42.json - hash: md5 + - hash: md5 md5: 99e684c720ca4c4ef6c4276e7d1880ab + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/metrics_42.json size: 305 - evaluate@pl-court-instruct-Bielik-7B-Instruct-v0.1-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_7312.json + evaluate_pl@pl-court-instruct-Bielik-7B-Instruct-v0.1-fine-tuned-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_7312.json --num-proc=-1 deps: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_7312.json - hash: md5 + - hash: md5 md5: 743ea22448bc73a7a991da075fca8841 + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_7312.json size: 2031343 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/metrics_7312.json - hash: md5 + - hash: md5 md5: 07d798079cedf3dc194242d6a1bc3bcd + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/metrics_7312.json size: 306 - evaluate@pl-court-instruct-Bielik-7B-Instruct-v0.1-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_997.json + evaluate_pl@pl-court-instruct-Bielik-7B-Instruct-v0.1-fine-tuned-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_997.json --num-proc=-1 deps: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_997.json - hash: md5 + - hash: md5 md5: 433a4b2aa7870a134277a265d099a588 + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_997.json size: 2029482 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/metrics_997.json - hash: md5 + - hash: md5 md5: adb7c1e239396bbf6e308f3f1b436099 + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/metrics_997.json size: 307 - evaluate@pl-court-instruct-trurl-13B-academic-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/trurl-13B-academic/outputs_42.json + evaluate_pl@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json --num-proc=-1 deps: - - path: data/experiments/predict/pl-court-instruct/trurl-13B-academic/outputs_42.json - hash: md5 - md5: bb571102170940efc73f02143a530d5b - size: 1289839 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 + md5: e99c88720116c951087b6125e5f4be4d + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json + size: 2008073 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: data/experiments/predict/pl-court-instruct/trurl-13B-academic/metrics_42.json - hash: md5 - md5: 5bb8bd6918ec3d91437d3465cbbee127 - size: 311 - evaluate@pl-court-instruct-trurl-13B-academic-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/trurl-13B-academic/outputs_7312.json + - hash: md5 + md5: 2116481b79c785f94b35852b6e0e4f57 + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_42.json + size: 304 + evaluate_pl@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json --num-proc=-1 deps: - - path: data/experiments/predict/pl-court-instruct/trurl-13B-academic/outputs_7312.json - hash: md5 - md5: bcd41ca4629d4cec2440a8ed2f02560f - size: 1283974 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 + md5: 4c25368aacb7402b1b2cae9368d187d1 + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json + size: 2013637 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: data/experiments/predict/pl-court-instruct/trurl-13B-academic/metrics_7312.json - hash: md5 - md5: da990070981d0524aa31916fad80e0eb - size: 313 - evaluate@pl-court-instruct-trurl-13B-academic-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/trurl-13B-academic/outputs_997.json + - hash: md5 + md5: 5e851a38b322caff59de90004eb4a075 + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_7312.json + size: 305 + evaluate_pl@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json --num-proc=-1 deps: - - path: data/experiments/predict/pl-court-instruct/trurl-13B-academic/outputs_997.json - hash: md5 - md5: 731cff0eb1484682de211336efeff153 - size: 1288941 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 + md5: baef589507248af212aaae51602fd999 + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json + size: 2010150 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: data/experiments/predict/pl-court-instruct/trurl-13B-academic/metrics_997.json - hash: md5 - md5: 4d080092735994614eaf7125e5fe5bf2 - size: 313 - evaluate@en-court-instruct-Unsloth-Llama-3-8B-Instruct-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json - deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json - hash: md5 - md5: 761018c0a306fbee63dad2fbc119110d - size: 821683 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_42.json - hash: md5 - md5: 265776ba10a7b24b66e6bac1131e0c48 - size: 149 - evaluate@en-court-instruct-Unsloth-Llama-3-8B-Instruct-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json - deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json - hash: md5 - md5: a7361535b440251d6ce6232a15cfcdf2 - size: 818877 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_7312.json - hash: md5 - md5: 97fa8dfaa5e57633e8fb6a7d073177f5 - size: 147 - evaluate@en-court-instruct-Unsloth-Llama-3-8B-Instruct-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json - deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json - hash: md5 - md5: 94924275d576271875fecf22c0f9b39e - size: 817490 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_997.json - hash: md5 - md5: c3552161ec68d8cc6a8e5b75f02e22e2 - size: 147 - evaluate@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json - deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json - hash: md5 - md5: 4fe25ad80a20ea5d6200136176b3e4ca - size: 705218 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_42.json - hash: md5 - md5: 0b2f663a1cbc3ef08c363ec8adc53c15 - size: 151 - evaluate@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json - deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json - hash: md5 - md5: cf4fdbf0e26e6c793bdca4edd6e365c0 - size: 703876 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_7312.json - hash: md5 - md5: 604b5cee14ec6520b88bafecc962e031 - size: 152 - evaluate@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json - deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json - hash: md5 - md5: 94c30cf8fe7db71afc58a5c9cdbc0d9f - size: 705894 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 - outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_997.json - hash: md5 - md5: a91ec5b434bebd8ce1d2000e0a033cb9 - size: 152 - sft_unsloth@en-court-instruct-Unsloth-Llama-3-8B-Instruct: - cmd: PYTHONPATH=. python scripts/sft/fine_tune_llm.py dataset=en-court-instruct - model=Unsloth-Llama-3-8B-Instruct - deps: - - path: configs/fine_tuning.yaml - hash: md5 - md5: 3933c4faf5a478d0f9d3963c3b29e5cc - size: 1356 - - path: configs/model/Unsloth-Llama-3-8B-Instruct.yaml - hash: md5 - md5: 56a95874b3e77e7ffec11c00330da5b6 - size: 176 - - path: scripts/sft/fine_tune_llm.py - hash: md5 - md5: 4b77ee1ea604cae18f17ca00cdb6988b - size: 4578 - outs: - - path: data/experiments/fine-tune/Unsloth-Llama-3-8B-Instruct/en-court-instruct/ - hash: md5 - md5: c99c2a68274325db86fbbd41bcc30e78.dir - size: 354395477 - nfiles: 18 - predict@pl-court-instruct-qra-13b-997: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=qra-13b - random_seed=997 output_file=data/experiments/predict/pl-court-instruct/qra-13b/outputs_997.json - deps: - - path: configs/model/qra-13b.yaml - hash: md5 - md5: ab2baba7b6109364d7e04c77232b0f9d - size: 152 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: data/experiments/predict/pl-court-instruct/qra-13b/outputs_997.json - hash: md5 - md5: 72ef8a411b8f5aeb006c99e5868c754d - size: 2252480 - predict_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-7312: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=en-court-instruct model=Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en - random_seed=7312 - output_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_7312.json - deps: - - path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en.yaml - hash: md5 - md5: 5986ff103292733bff4662585ae5d860 - size: 351 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_7312.json - hash: md5 - md5: 4ed8db93aa14f1cc98e276d3989efa9e - size: 642730 - predict_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-997: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=en-court-instruct model=Unsloth-Llama-3-8B-Instruct-fine-tuned-en - random_seed=997 - output_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_997.json - deps: - - path: configs/model/Unsloth-Llama-3-8B-Instruct-fine-tuned-en.yaml - hash: md5 - md5: 67faffd57ec54260d70eb3a89d2ec130 - size: 259 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_997.json - hash: md5 - md5: 4e968cac351ad48ad786d1ecccbbc967 - size: 670674 - predict_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-997: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=en-court-instruct model=Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en - random_seed=997 - output_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_997.json - deps: - - path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en.yaml - hash: md5 - md5: 5986ff103292733bff4662585ae5d860 - size: 351 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_997.json - hash: md5 - md5: 787c129090aa1b64e337b236a4391402 - size: 642477 - predict@pl-court-instruct-qra-13b-42: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=qra-13b - random_seed=42 output_file=data/experiments/predict/pl-court-instruct/qra-13b/outputs_42.json - deps: - - path: configs/model/qra-13b.yaml - hash: md5 - md5: ab2baba7b6109364d7e04c77232b0f9d - size: 152 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: data/experiments/predict/pl-court-instruct/qra-13b/outputs_42.json - hash: md5 - md5: dd142d2d1c24c499bbe615bf4b74525c - size: 2247396 - predict_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-7312: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=en-court-instruct model=Unsloth-Llama-3-8B-Instruct-fine-tuned-en - random_seed=7312 - output_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_7312.json - deps: - - path: configs/model/Unsloth-Llama-3-8B-Instruct-fine-tuned-en.yaml - hash: md5 - md5: 67faffd57ec54260d70eb3a89d2ec130 - size: 259 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_7312.json - hash: md5 - md5: f0b806eebca2f3ddf49d0ff821856b45 - size: 670935 - predict_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-42: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=en-court-instruct model=Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en - random_seed=42 - output_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_42.json - deps: - - path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en.yaml - hash: md5 - md5: 5986ff103292733bff4662585ae5d860 - size: 351 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 - outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_42.json - hash: md5 - md5: 313fa5a662f37cacae4980a04830f422 - size: 642688 - predict_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-42: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=en-court-instruct model=Unsloth-Llama-3-8B-Instruct-fine-tuned-en - random_seed=42 - output_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_42.json + - hash: md5 + md5: 8c3af9851700f2ff640dd9c8dc92b06d + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_997.json + size: 307 + evaluate_pl@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_42.json + --num-proc=-1 deps: - - path: configs/model/Unsloth-Llama-3-8B-Instruct-fine-tuned-en.yaml - hash: md5 - md5: 67faffd57ec54260d70eb3a89d2ec130 - size: 259 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: 289b719e8c7166e578417e5706bdc4e3 + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_42.json + size: 1760355 + - hash: md5 + md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py + size: 697 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_42.json - hash: md5 - md5: 4246a4fafba5e130aac3db6c1c61ce30 - size: 675578 - predict@pl-court-instruct-qra-13b-7312: - cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=qra-13b - random_seed=7312 output_file=data/experiments/predict/pl-court-instruct/qra-13b/outputs_7312.json + - hash: md5 + md5: 35fc5163dfb37097b814afcc79e91074 + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/metrics_42.json + size: 304 + evaluate_pl@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_7312.json + --num-proc=-1 deps: - - path: configs/model/qra-13b.yaml - hash: md5 - md5: ab2baba7b6109364d7e04c77232b0f9d - size: 152 - - path: configs/predict.yaml - hash: md5 - md5: 5fc8b9ac571d4a2209d7d866697252ab - size: 402 - - path: scripts/sft/predict.py - hash: md5 - md5: f9acd63cd4d682ae2242d7b51f0d974b - size: 3198 + - hash: md5 + md5: 25bee3b4ee09b36d636095b4c927a0d3 + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_7312.json + size: 1759194 + - hash: md5 + md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py + size: 697 outs: - - path: data/experiments/predict/pl-court-instruct/qra-13b/outputs_7312.json - hash: md5 - md5: fddb307b29b598df3786fc94d479e918 - size: 2254243 - evaluate_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json + - hash: md5 + md5: f7c13c964cc9e225fa794935cbf6515c + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/metrics_7312.json + size: 305 + evaluate_pl@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_997.json --num-proc=-1 deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json - hash: md5 - md5: 761018c0a306fbee63dad2fbc119110d - size: 821683 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 + md5: 82b2c535d99d91b9a34986375bfa31a9 + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_997.json + size: 1758747 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_42.json - hash: md5 - md5: 265776ba10a7b24b66e6bac1131e0c48 - size: 149 - evaluate_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json + - hash: md5 + md5: 68f0244a7871bae1e8bd0642a0f2c22e + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/metrics_997.json + size: 305 + evaluate_pl@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_42.json --num-proc=-1 deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json - hash: md5 - md5: a7361535b440251d6ce6232a15cfcdf2 - size: 818877 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 + md5: 4a9d3a2bb1dd47a732bd2df8102bc93f + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_42.json + size: 1799957 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_7312.json - hash: md5 - md5: 97fa8dfaa5e57633e8fb6a7d073177f5 - size: 147 - evaluate_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json + - hash: md5 + md5: 703e92a1c58aca701b128fd28f4697a4 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/metrics_42.json + size: 306 + evaluate_pl@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_7312.json --num-proc=-1 deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json - hash: md5 - md5: 94924275d576271875fecf22c0f9b39e - size: 817490 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 + md5: 77e10dd2ec17e12e171e4bcab1a48e08 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_7312.json + size: 1795629 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_997.json - hash: md5 - md5: c3552161ec68d8cc6a8e5b75f02e22e2 - size: 147 - evaluate_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_42.json + - hash: md5 + md5: 6f3c13385fefb9e38f01a42bb210e3f5 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/metrics_7312.json + size: 309 + evaluate_pl@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_997.json --num-proc=-1 deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_42.json - hash: md5 - md5: 4246a4fafba5e130aac3db6c1c61ce30 - size: 675578 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 + md5: bbb883aa388b274bef3e9296df26f68f + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_997.json + size: 1795752 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/metrics_42.json - hash: md5 - md5: 016d1c87b2925c6f941400d178bee018 - size: 157 - evaluate_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_7312.json + - hash: md5 + md5: 533df4f640eb0699b5382cc759e0a45d + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/metrics_997.json + size: 310 + evaluate_pl@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_42.json --num-proc=-1 deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_7312.json - hash: md5 - md5: f0b806eebca2f3ddf49d0ff821856b45 - size: 670935 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 + md5: 156091297490d893f9815d2ffcf17cbf + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_42.json + size: 1792160 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/metrics_7312.json - hash: md5 - md5: a8459393feb773fea85ede4b831b3fa6 - size: 157 - evaluate_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_997.json + - hash: md5 + md5: 89338c144457d5297d844c5f9b341f9f + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/metrics_42.json + size: 307 + evaluate_pl@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_7312.json --num-proc=-1 deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_997.json - hash: md5 - md5: 4e968cac351ad48ad786d1ecccbbc967 - size: 670674 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 + md5: 3d336675e54a706fae45349adbaf6ee4 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_7312.json + size: 1793461 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/metrics_997.json - hash: md5 - md5: 21bc79aad7ab2e97b75e1d3fb18a2263 - size: 157 - evaluate_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json + - hash: md5 + md5: 8c6ac6f31de90a1fb08d73d08a8544dc + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/metrics_7312.json + size: 305 + evaluate_pl@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_997.json --num-proc=-1 deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json - hash: md5 - md5: 4fe25ad80a20ea5d6200136176b3e4ca - size: 705218 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 + md5: 55d682fba1c08c68552e98be6b503b4e + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_997.json + size: 1790731 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_42.json - hash: md5 - md5: 0b2f663a1cbc3ef08c363ec8adc53c15 - size: 151 - evaluate_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json + - hash: md5 + md5: 9873757edce9412fdee1ef45513f26ac + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/metrics_997.json + size: 307 + evaluate_pl@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json --num-proc=-1 deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json - hash: md5 - md5: cf4fdbf0e26e6c793bdca4edd6e365c0 - size: 703876 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 + md5: 1385f49966e9db2a88a17f53d0887ad8 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json + size: 1741944 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_7312.json - hash: md5 - md5: 604b5cee14ec6520b88bafecc962e031 - size: 152 - evaluate_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json + - hash: md5 + md5: 37165fb96f31997589e95108f6c149a3 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_42.json + size: 306 + evaluate_pl@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json --num-proc=-1 deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json - hash: md5 - md5: 94c30cf8fe7db71afc58a5c9cdbc0d9f - size: 705894 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 + md5: 924744efce1483e9128579cad7a4454c + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json + size: 1748772 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_997.json - hash: md5 - md5: a91ec5b434bebd8ce1d2000e0a033cb9 - size: 152 - evaluate_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_42.json + - hash: md5 + md5: 2ea9afa67cf34dc1629a265bce2c3357 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_7312.json + size: 306 + evaluate_pl@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json --num-proc=-1 deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_42.json - hash: md5 - md5: 313fa5a662f37cacae4980a04830f422 - size: 642688 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 + md5: 4d023797a9053fd7df61f6b1796112e9 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json + size: 1747404 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/metrics_42.json - hash: md5 - md5: f0d37c5ac017c0e488b7c3bed01c7093 - size: 156 - evaluate_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_7312.json + - hash: md5 + md5: 122c6d789f343d4e7232d8720b0f577a + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_997.json + size: 303 + evaluate_pl@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_42.json --num-proc=-1 deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_7312.json - hash: md5 - md5: 4ed8db93aa14f1cc98e276d3989efa9e - size: 642730 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 + md5: 14d4613f7d9495f5fb5f2d7b81f402a9 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_42.json + size: 1825646 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/metrics_7312.json - hash: md5 - md5: a1521ab06a56258759953bb02ae87e24 - size: 157 - evaluate_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_997.json + - hash: md5 + md5: e43e3fe605787204ba1345dedaefd124 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/metrics_42.json + size: 305 + evaluate_pl@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_7312.json --num-proc=-1 deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_997.json - hash: md5 - md5: 787c129090aa1b64e337b236a4391402 - size: 642477 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 + md5: 302e1dc4f064007e3df88ac1e8acccc5 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_7312.json + size: 1831330 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/metrics_997.json - hash: md5 - md5: f3339245ea358de4b1348c8393153946 - size: 157 - evaluate_en@en-court-instruct-open_ai_gpt-4o-mini-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/outputs_997.json + - hash: md5 + md5: b5d0fa11fe5557bbe1ee9804d5d09cb1 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/metrics_7312.json + size: 303 + evaluate_pl@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_997.json + --num-proc=-1 deps: - - path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/outputs_997.json - hash: md5 - md5: 2a0819011b3eac56e497201a9f67e310 - size: 690306 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 + - hash: md5 + md5: 41a47dc56efc29b6c2771db68bdacb17 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_997.json + size: 1822491 + - hash: md5 + md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py + size: 697 outs: - - path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/metrics_997.json - hash: md5 - md5: d70eb0821aff9c9e874a421b80f7f697 - size: 155 - evaluate@pl-court-instruct-qra-13b-42: + - hash: md5 + md5: 403fc36606ced0ab31d34d3d548f948e + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/metrics_997.json + size: 303 + evaluate_pl@pl-court-instruct-qra-13b-42: cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/qra-13b/outputs_42.json --num-proc=-1 deps: - - path: data/experiments/predict/pl-court-instruct/qra-13b/outputs_42.json - hash: md5 + - hash: md5 md5: dd142d2d1c24c499bbe615bf4b74525c + path: data/experiments/predict/pl-court-instruct/qra-13b/outputs_42.json size: 2247396 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: data/experiments/predict/pl-court-instruct/qra-13b/metrics_42.json - hash: md5 + - hash: md5 md5: 861d59d796c9957aba2973741fd77d65 + path: data/experiments/predict/pl-court-instruct/qra-13b/metrics_42.json size: 202 - evaluate@pl-court-instruct-qra-13b-7312: + evaluate_pl@pl-court-instruct-qra-13b-7312: cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/qra-13b/outputs_7312.json --num-proc=-1 deps: - - path: data/experiments/predict/pl-court-instruct/qra-13b/outputs_7312.json - hash: md5 + - hash: md5 md5: fddb307b29b598df3786fc94d479e918 + path: data/experiments/predict/pl-court-instruct/qra-13b/outputs_7312.json size: 2254243 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: data/experiments/predict/pl-court-instruct/qra-13b/metrics_7312.json - hash: md5 + - hash: md5 md5: 78daedab21d1748c95b7308b423e6e73 + path: data/experiments/predict/pl-court-instruct/qra-13b/metrics_7312.json size: 201 - evaluate@pl-court-instruct-qra-13b-997: + evaluate_pl@pl-court-instruct-qra-13b-997: cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/qra-13b/outputs_997.json --num-proc=-1 deps: - - path: data/experiments/predict/pl-court-instruct/qra-13b/outputs_997.json - hash: md5 + - hash: md5 md5: 72ef8a411b8f5aeb006c99e5868c754d + path: data/experiments/predict/pl-court-instruct/qra-13b/outputs_997.json size: 2252480 - - path: scripts/sft/evaluate.py - hash: md5 + - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py size: 697 outs: - - path: data/experiments/predict/pl-court-instruct/qra-13b/metrics_997.json - hash: md5 + - hash: md5 md5: c3b7776073786447d84bd5200c39ecb9 + path: data/experiments/predict/pl-court-instruct/qra-13b/metrics_997.json size: 201 + evaluate_pl@pl-court-instruct-trurl-13B-academic-42: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/trurl-13B-academic/outputs_42.json + --num-proc=-1 + deps: + - hash: md5 + md5: bb571102170940efc73f02143a530d5b + path: data/experiments/predict/pl-court-instruct/trurl-13B-academic/outputs_42.json + size: 1289839 + - hash: md5 + md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py + size: 697 + outs: + - hash: md5 + md5: 5bb8bd6918ec3d91437d3465cbbee127 + path: data/experiments/predict/pl-court-instruct/trurl-13B-academic/metrics_42.json + size: 311 + evaluate_pl@pl-court-instruct-trurl-13B-academic-7312: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/trurl-13B-academic/outputs_7312.json + --num-proc=-1 + deps: + - hash: md5 + md5: bcd41ca4629d4cec2440a8ed2f02560f + path: data/experiments/predict/pl-court-instruct/trurl-13B-academic/outputs_7312.json + size: 1283974 + - hash: md5 + md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py + size: 697 + outs: + - hash: md5 + md5: da990070981d0524aa31916fad80e0eb + path: data/experiments/predict/pl-court-instruct/trurl-13B-academic/metrics_7312.json + size: 313 + evaluate_pl@pl-court-instruct-trurl-13B-academic-997: + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/trurl-13B-academic/outputs_997.json + --num-proc=-1 + deps: + - hash: md5 + md5: 731cff0eb1484682de211336efeff153 + path: data/experiments/predict/pl-court-instruct/trurl-13B-academic/outputs_997.json + size: 1288941 + - hash: md5 + md5: 0644efb76af2c5461185e37a07ba2c17 + path: scripts/sft/evaluate.py + size: 697 + outs: + - hash: md5 + md5: 4d080092735994614eaf7125e5fe5bf2 + path: data/experiments/predict/pl-court-instruct/trurl-13B-academic/metrics_997.json + size: 313 + instruct_dataset_readme: + cmd: jupyter nbconvert --no-input --to markdown --execute nbs/Data/03_Dataset_Description_Instruct.ipynb + --output-dir data/datasets/pl/readme/instruct --output README + deps: + - hash: md5 + md5: 27e6d517445028d45e5c40b22febece4 + path: nbs/Data/03_Dataset_Description_Instruct.ipynb + size: 16215 + outs: + - hash: md5 + md5: de02794df3d74d86f8610f040a17dcbe.dir + nfiles: 5 + path: data/datasets/pl/readme/instruct/ + size: 144326 predict_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-42: cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=en-court-instruct model=Unsloth-Llama-3-8B-Instruct - random_seed=42 - output_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json + random_seed=42 output_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json deps: - - path: configs/model/Unsloth-Llama-3-8B-Instruct.yaml - hash: md5 + - hash: md5 md5: 56a95874b3e77e7ffec11c00330da5b6 + path: configs/model/Unsloth-Llama-3-8B-Instruct.yaml size: 176 - - path: configs/predict.yaml - hash: md5 + - hash: md5 md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml size: 402 - - path: scripts/sft/predict.py - hash: md5 + - hash: md5 md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py size: 3198 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json - hash: md5 + - hash: md5 md5: 761018c0a306fbee63dad2fbc119110d + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json size: 821683 predict_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-7312: cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=en-court-instruct model=Unsloth-Llama-3-8B-Instruct - random_seed=7312 - output_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json + random_seed=7312 output_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json deps: - - path: configs/model/Unsloth-Llama-3-8B-Instruct.yaml - hash: md5 + - hash: md5 md5: 56a95874b3e77e7ffec11c00330da5b6 + path: configs/model/Unsloth-Llama-3-8B-Instruct.yaml size: 176 - - path: configs/predict.yaml - hash: md5 + - hash: md5 md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml size: 402 - - path: scripts/sft/predict.py - hash: md5 + - hash: md5 md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py size: 3198 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json - hash: md5 + - hash: md5 md5: a7361535b440251d6ce6232a15cfcdf2 + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json size: 818877 predict_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-997: cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=en-court-instruct model=Unsloth-Llama-3-8B-Instruct - random_seed=997 - output_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json + random_seed=997 output_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json deps: - - path: configs/model/Unsloth-Llama-3-8B-Instruct.yaml - hash: md5 + - hash: md5 md5: 56a95874b3e77e7ffec11c00330da5b6 + path: configs/model/Unsloth-Llama-3-8B-Instruct.yaml size: 176 - - path: configs/predict.yaml - hash: md5 + - hash: md5 md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml size: 402 - - path: scripts/sft/predict.py - hash: md5 + - hash: md5 md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py size: 3198 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json - hash: md5 + - hash: md5 md5: 94924275d576271875fecf22c0f9b39e + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json size: 817490 + predict_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-42: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=en-court-instruct model=Unsloth-Llama-3-8B-Instruct-fine-tuned-en + random_seed=42 output_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_42.json + deps: + - hash: md5 + md5: 67faffd57ec54260d70eb3a89d2ec130 + path: configs/model/Unsloth-Llama-3-8B-Instruct-fine-tuned-en.yaml + size: 259 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 + outs: + - hash: md5 + md5: 4246a4fafba5e130aac3db6c1c61ce30 + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_42.json + size: 675578 + predict_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-7312: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=en-court-instruct model=Unsloth-Llama-3-8B-Instruct-fine-tuned-en + random_seed=7312 output_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_7312.json + deps: + - hash: md5 + md5: 67faffd57ec54260d70eb3a89d2ec130 + path: configs/model/Unsloth-Llama-3-8B-Instruct-fine-tuned-en.yaml + size: 259 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 + outs: + - hash: md5 + md5: f0b806eebca2f3ddf49d0ff821856b45 + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_7312.json + size: 670935 + predict_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-997: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=en-court-instruct model=Unsloth-Llama-3-8B-Instruct-fine-tuned-en + random_seed=997 output_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_997.json + deps: + - hash: md5 + md5: 67faffd57ec54260d70eb3a89d2ec130 + path: configs/model/Unsloth-Llama-3-8B-Instruct-fine-tuned-en.yaml + size: 259 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 + outs: + - hash: md5 + md5: 4e968cac351ad48ad786d1ecccbbc967 + path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_997.json + size: 670674 predict_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-42: cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=en-court-instruct model=Unsloth-Mistral-Nemo-Instruct-2407 - random_seed=42 - output_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json + random_seed=42 output_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json deps: - - path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407.yaml - hash: md5 + - hash: md5 md5: ca5ac52e503c9f488f98f569811c76dc + path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407.yaml size: 261 - - path: configs/predict.yaml - hash: md5 + - hash: md5 md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml size: 402 - - path: scripts/sft/predict.py - hash: md5 + - hash: md5 md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py size: 3198 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json - hash: md5 + - hash: md5 md5: 4fe25ad80a20ea5d6200136176b3e4ca + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json size: 705218 predict_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-7312: cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=en-court-instruct model=Unsloth-Mistral-Nemo-Instruct-2407 - random_seed=7312 - output_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json + random_seed=7312 output_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json deps: - - path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407.yaml - hash: md5 + - hash: md5 md5: ca5ac52e503c9f488f98f569811c76dc + path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407.yaml size: 261 - - path: configs/predict.yaml - hash: md5 + - hash: md5 md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml size: 402 - - path: scripts/sft/predict.py - hash: md5 + - hash: md5 md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py size: 3198 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json - hash: md5 + - hash: md5 md5: cf4fdbf0e26e6c793bdca4edd6e365c0 + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json size: 703876 predict_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-997: cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=en-court-instruct model=Unsloth-Mistral-Nemo-Instruct-2407 - random_seed=997 - output_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json + random_seed=997 output_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json deps: - - path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407.yaml - hash: md5 + - hash: md5 md5: ca5ac52e503c9f488f98f569811c76dc + path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407.yaml size: 261 - - path: configs/predict.yaml - hash: md5 + - hash: md5 md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml size: 402 - - path: scripts/sft/predict.py - hash: md5 + - hash: md5 md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py size: 3198 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json - hash: md5 + - hash: md5 md5: 94c30cf8fe7db71afc58a5c9cdbc0d9f + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json size: 705894 - predict_with_api@pl-court-instruct-gpt-4o-997: - cmd: PYTHONPATH=. python scripts/sft/predict_with_api.py dataset=pl-court-instruct - model_version=gpt-4o seed=997 - output_file=data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json - deps: - - path: configs/predict_with_api.yaml - hash: md5 - md5: aff18078742a14c3d8ce2cd74e718d44 - size: 320 - - path: scripts/sft/predict_with_api.py - hash: md5 - md5: 142508c7b6df391083b0e81a3a6c4795 - size: 3968 - outs: - - path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json - hash: md5 - md5: 7c5833fdd1419163b286baaa3d71e084 - size: 1965252 - predict_with_api@pl-court-instruct-gpt-4o-mini-997: - cmd: PYTHONPATH=. python scripts/sft/predict_with_api.py dataset=pl-court-instruct - model_version=gpt-4o-mini seed=997 - output_file=data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json - deps: - - path: configs/predict_with_api.yaml - hash: md5 - md5: aff18078742a14c3d8ce2cd74e718d44 - size: 320 - - path: scripts/sft/predict_with_api.py - hash: md5 - md5: 142508c7b6df391083b0e81a3a6c4795 - size: 3968 - outs: - - path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json - hash: md5 - md5: 839c911f542cd7c60c9ae52ef95e9907 - size: 1812429 - predict_with_api@en-court-instruct-gpt-4o-997: - cmd: PYTHONPATH=. python scripts/sft/predict_with_api.py dataset=en-court-instruct - model_version=gpt-4o seed=997 - output_file=data/experiments/predict/en-court-instruct/open_ai_gpt-4o/outputs_997.json + predict_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-42: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=en-court-instruct model=Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en + random_seed=42 output_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_42.json deps: - - path: configs/predict_with_api.yaml - hash: md5 - md5: aff18078742a14c3d8ce2cd74e718d44 - size: 320 - - path: scripts/sft/predict_with_api.py - hash: md5 - md5: 142508c7b6df391083b0e81a3a6c4795 - size: 3968 + - hash: md5 + md5: 5986ff103292733bff4662585ae5d860 + path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en.yaml + size: 351 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o/outputs_997.json - hash: md5 - md5: 8f70e2baa0b0ae8a320577f5c8a60011 - size: 679432 - evaluate@pl-court-instruct-open_ai_gpt-4o-mini-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json + - hash: md5 + md5: 313fa5a662f37cacae4980a04830f422 + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_42.json + size: 642688 + predict_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-7312: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=en-court-instruct model=Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en + random_seed=7312 output_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_7312.json deps: - - path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json - hash: md5 - md5: 839c911f542cd7c60c9ae52ef95e9907 - size: 1812429 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 + - hash: md5 + md5: 5986ff103292733bff4662585ae5d860 + path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en.yaml + size: 351 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/metrics_997.json - hash: md5 - md5: fe43f0d25b500a0f2fb2d8199b8034fd - size: 305 - evaluate@pl-court-instruct-open_ai_gpt-4o-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json + - hash: md5 + md5: 4ed8db93aa14f1cc98e276d3989efa9e + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_7312.json + size: 642730 + predict_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-997: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=en-court-instruct model=Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en + random_seed=997 output_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_997.json deps: - - path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json - hash: md5 - md5: 7c5833fdd1419163b286baaa3d71e084 - size: 1965252 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 + - hash: md5 + md5: 5986ff103292733bff4662585ae5d860 + path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en.yaml + size: 351 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/metrics_997.json - hash: md5 - md5: 65c808d4aebd8efe37b94a5128a19de6 - size: 306 - evaluate_en@en-court-instruct-open_ai_gpt-4o-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/open_ai_gpt-4o/outputs_997.json + - hash: md5 + md5: 787c129090aa1b64e337b236a4391402 + path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_997.json + size: 642477 + predict_pl@pl-court-instruct-Bielik-11B-v2.2-Instruct-42: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Bielik-11B-v2.2-Instruct + random_seed=42 output_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_42.json deps: - - path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o/outputs_997.json - hash: md5 - md5: 8f70e2baa0b0ae8a320577f5c8a60011 - size: 679432 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 73aa4a7eb8a035c087702457b9401654 - size: 636 + - hash: md5 + md5: 1cfb3fbe30fac3e07a30339e6bf197c9 + path: configs/model/Bielik-11B-v2.2-Instruct.yaml + size: 175 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o/metrics_997.json - hash: md5 - md5: ac30bcf3c40000cab61e0914b56aba85 - size: 157 - evaluate_llm_as_judge@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_42.json - prompt=pl + - hash: md5 + md5: c3e404c898e3e193ac3aa910187b4f9f + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_42.json + size: 1734129 + predict_pl@pl-court-instruct-Bielik-11B-v2.2-Instruct-7312: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Bielik-11B-v2.2-Instruct + random_seed=7312 output_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_7312.json deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json - hash: md5 - md5: e99c88720116c951087b6125e5f4be4d - size: 2008073 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: 1cfb3fbe30fac3e07a30339e6bf197c9 + path: configs/model/Bielik-11B-v2.2-Instruct.yaml + size: 175 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_42.json - hash: md5 - md5: 9d9fba0cf2169e9dd9f69579a2182b8e - size: 1172 - predict@pl-court-instruct-Bielik-11B-v2.2-Instruct-997: + - hash: md5 + md5: d4a2ab2393a58f0d7e1897859eccb626 + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_7312.json + size: 1734772 + predict_pl@pl-court-instruct-Bielik-11B-v2.2-Instruct-997: cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Bielik-11B-v2.2-Instruct - random_seed=997 - output_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_997.json + random_seed=997 output_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_997.json deps: - - path: configs/model/Bielik-11B-v2.2-Instruct.yaml - hash: md5 + - hash: md5 md5: 1cfb3fbe30fac3e07a30339e6bf197c9 + path: configs/model/Bielik-11B-v2.2-Instruct.yaml size: 175 - - path: configs/predict.yaml - hash: md5 + - hash: md5 md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml size: 402 - - path: scripts/sft/predict.py - hash: md5 + - hash: md5 md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py size: 3198 outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_997.json - hash: md5 + - hash: md5 md5: 8f4f6bc97e33b3b2728bebb7620a4968 + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_997.json size: 1731689 - evaluate_llm_as_judge@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_7312.json - prompt=pl + predict_pl@pl-court-instruct-Bielik-11B-v2.2-Instruct-fine-tuned-42: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Bielik-11B-v2.2-Instruct-fine-tuned + random_seed=42 output_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_42.json deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json - hash: md5 - md5: 4c25368aacb7402b1b2cae9368d187d1 - size: 2013637 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: b72e852654399c31589e5368e554cbfb + path: configs/model/Bielik-11B-v2.2-Instruct-fine-tuned.yaml + size: 256 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_7312.json - hash: md5 - md5: e58171fc082d33c84497a13dabcf766c - size: 1167 - evaluate_llm_as_judge@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_997.json - prompt=pl + - hash: md5 + md5: dfd5d7389b312686428cc967aea5a5b9 + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_42.json + size: 1860743 + predict_pl@pl-court-instruct-Bielik-11B-v2.2-Instruct-fine-tuned-7312: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Bielik-11B-v2.2-Instruct-fine-tuned + random_seed=7312 output_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_7312.json deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json - hash: md5 - md5: baef589507248af212aaae51602fd999 - size: 2010150 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: b72e852654399c31589e5368e554cbfb + path: configs/model/Bielik-11B-v2.2-Instruct-fine-tuned.yaml + size: 256 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_997.json - hash: md5 - md5: f8d16a5298fabe288486822779470cd8 - size: 1165 - evaluate_llm_as_judge@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_42.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_42.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_42.json - hash: md5 - md5: 289b719e8c7166e578417e5706bdc4e3 - size: 1760355 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 1e556a79b0f9cf0a9cfdb23ed8077bcc - size: 2172 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_42.json - hash: md5 - md5: 70398042d030309e7e0bc7ba927136f3 - size: 1167 - evaluate_llm_as_judge@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_7312.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_7312.json + - hash: md5 + md5: 8fa2faeda5a577c06cd6bf35b8702330 + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_7312.json + size: 1857569 + predict_pl@pl-court-instruct-Bielik-11B-v2.2-Instruct-fine-tuned-997: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Bielik-11B-v2.2-Instruct-fine-tuned + random_seed=997 output_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_997.json deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_7312.json - hash: md5 - md5: 25bee3b4ee09b36d636095b4c927a0d3 - size: 1759194 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 1e556a79b0f9cf0a9cfdb23ed8077bcc - size: 2172 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_7312.json - hash: md5 - md5: 9d22089c8d23bbc5a028c748e5522c23 - size: 1157 - evaluate_llm_as_judge@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_997.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_997.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_997.json - hash: md5 - md5: 82b2c535d99d91b9a34986375bfa31a9 - size: 1758747 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 1e556a79b0f9cf0a9cfdb23ed8077bcc - size: 2172 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_997.json - hash: md5 - md5: 4222d5b165de8a3a89d71d6519b71b76 - size: 1170 - evaluate_llm_as_judge@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_42.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json - hash: md5 - md5: 1385f49966e9db2a88a17f53d0887ad8 - size: 1741944 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 1e556a79b0f9cf0a9cfdb23ed8077bcc - size: 2172 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_42.json - hash: md5 - md5: f4bac633a65afde9bf5612f35c3089bb - size: 1170 - evaluate_llm_as_judge@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_7312.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json - hash: md5 - md5: 924744efce1483e9128579cad7a4454c - size: 1748772 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 1e556a79b0f9cf0a9cfdb23ed8077bcc - size: 2172 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_7312.json - hash: md5 - md5: 1f95777ef87a547fa7a41dc597adfc39 - size: 1166 - evaluate_llm_as_judge@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_997.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json - hash: md5 - md5: 4d023797a9053fd7df61f6b1796112e9 - size: 1747404 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 1e556a79b0f9cf0a9cfdb23ed8077bcc - size: 2172 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_997.json - hash: md5 - md5: de3f557dfdf3440262e4d8f811e526ca - size: 1167 - evaluate_llm_as_judge@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_42.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_42.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_42.json - hash: md5 - md5: 14d4613f7d9495f5fb5f2d7b81f402a9 - size: 1825646 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 1e556a79b0f9cf0a9cfdb23ed8077bcc - size: 2172 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_42.json - hash: md5 - md5: e8cff190991ee3164825dbf7eca03d12 - size: 1170 - evaluate_llm_as_judge@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_7312.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_7312.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_7312.json - hash: md5 - md5: 302e1dc4f064007e3df88ac1e8acccc5 - size: 1831330 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 1e556a79b0f9cf0a9cfdb23ed8077bcc - size: 2172 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_7312.json - hash: md5 - md5: aee4a08e0a4d0398b34a2587c039244d - size: 1169 - evaluate_llm_as_judge@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_997.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_997.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_997.json - hash: md5 - md5: 41a47dc56efc29b6c2771db68bdacb17 - size: 1822491 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 1e556a79b0f9cf0a9cfdb23ed8077bcc - size: 2172 - outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_997.json - hash: md5 - md5: aac703269b10c85d1a2b5303c22ca077 - size: 1168 - evaluate_llm_as_judge@gpt_4o_mini-Bielik-7B-Instruct-v0.1-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json - out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_42.json + - hash: md5 + md5: b72e852654399c31589e5368e554cbfb + path: configs/model/Bielik-11B-v2.2-Instruct-fine-tuned.yaml + size: 256 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 + outs: + - hash: md5 + md5: ba53d76f701eddb60a182de49d992878 + path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_997.json + size: 1857855 + predict_pl@pl-court-instruct-Bielik-7B-Instruct-v0.1-42: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Bielik-7B-Instruct-v0.1 + random_seed=42 output_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json deps: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json - hash: md5 + - hash: md5 + md5: c3412525e9819b53fbad06363a07a871 + path: configs/model/Bielik-7B-Instruct-v0.1.yaml + size: 173 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 + outs: + - hash: md5 md5: 2dc39513a04910c5d0c54380166639d9 + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json size: 2029644 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 1e556a79b0f9cf0a9cfdb23ed8077bcc - size: 2172 - outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_42.json - hash: md5 - md5: 243da4df07c6dfb5199b925e3f5c07aa - size: 1137 - evaluate_llm_as_judge@gpt_4o_mini-Bielik-7B-Instruct-v0.1-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json - out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_7312.json + predict_pl@pl-court-instruct-Bielik-7B-Instruct-v0.1-7312: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Bielik-7B-Instruct-v0.1 + random_seed=7312 output_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json deps: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json - hash: md5 + - hash: md5 + md5: c3412525e9819b53fbad06363a07a871 + path: configs/model/Bielik-7B-Instruct-v0.1.yaml + size: 173 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 + outs: + - hash: md5 md5: ae39bf31296ffe82c0f6a3e8c9ff63aa + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json size: 2014399 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 1e556a79b0f9cf0a9cfdb23ed8077bcc - size: 2172 - outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_7312.json - hash: md5 - md5: 8098cc937d57455ca47d32c3449159a3 - size: 1129 - evaluate_llm_as_judge@gpt_4o_mini-Bielik-7B-Instruct-v0.1-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_997.json - out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_997.json + predict_pl@pl-court-instruct-Bielik-7B-Instruct-v0.1-997: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Bielik-7B-Instruct-v0.1 + random_seed=997 output_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_997.json deps: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_997.json - hash: md5 + - hash: md5 + md5: c3412525e9819b53fbad06363a07a871 + path: configs/model/Bielik-7B-Instruct-v0.1.yaml + size: 173 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 + outs: + - hash: md5 md5: fac04d78ad020b50f79fc7277a037e8e + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_997.json size: 2016400 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 1e556a79b0f9cf0a9cfdb23ed8077bcc - size: 2172 - outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_997.json - hash: md5 - md5: f1390b2d50893a17c90fc277dc363d6a - size: 1139 - evaluate_llm_as_judge@gpt_4o_mini-Bielik-7B-Instruct-v0.1-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_42.json - out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_42.json + predict_pl@pl-court-instruct-Bielik-7B-Instruct-v0.1-fine-tuned-42: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Bielik-7B-Instruct-v0.1-fine-tuned + random_seed=42 output_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_42.json deps: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_42.json - hash: md5 + - hash: md5 + md5: 2d9590869dfe247d7c6335f3cd7dd0c2 + path: configs/model/Bielik-7B-Instruct-v0.1-fine-tuned.yaml + size: 253 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 + outs: + - hash: md5 md5: 178eb0649617d4a698da6c9e315e84c5 + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_42.json size: 2034749 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 1e556a79b0f9cf0a9cfdb23ed8077bcc - size: 2172 - outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_42.json - hash: md5 - md5: 302b957707520fa327d1da0edf18baa3 - size: 1167 - evaluate_llm_as_judge@gpt_4o_mini-Bielik-7B-Instruct-v0.1-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_7312.json - out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_7312.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_7312.json - hash: md5 - md5: 743ea22448bc73a7a991da075fca8841 - size: 2031343 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 1e556a79b0f9cf0a9cfdb23ed8077bcc - size: 2172 - outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_7312.json - hash: md5 - md5: 789f0906846251d3f0cab78d111f9c56 - size: 1163 - evaluate_llm_as_judge@gpt_4o_mini-Bielik-7B-Instruct-v0.1-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_997.json - out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_997.json - deps: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_997.json - hash: md5 - md5: 433a4b2aa7870a134277a265d099a588 - size: 2029482 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 1e556a79b0f9cf0a9cfdb23ed8077bcc - size: 2172 - outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_997.json - hash: md5 - md5: 90f3ed04ef29c5cd29b7ec8f02a780a1 - size: 1163 - evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json - out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_997.json - prompt=en - deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json - hash: md5 - md5: 94924275d576271875fecf22c0f9b39e - size: 817490 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 - outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_997.json - hash: md5 - md5: 4395c32931d25a1bd9aa092c5a0e5460 - size: 478 - evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_997.json - out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/judge_metrics_997.json - prompt=en - deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_997.json - hash: md5 - md5: 4e968cac351ad48ad786d1ecccbbc967 - size: 670674 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 - outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/judge_metrics_997.json - hash: md5 - md5: 90c2b0cd132130d0b9d3a60bf6fdd69b - size: 486 - evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json - out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_997.json - prompt=en - deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json - hash: md5 - md5: 94c30cf8fe7db71afc58a5c9cdbc0d9f - size: 705894 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 - outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_997.json - hash: md5 - md5: 860b5c00ace1f2967db9b5a977cfc3ad - size: 478 - evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_997.json - out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/judge_metrics_997.json - prompt=en + predict_pl@pl-court-instruct-Bielik-7B-Instruct-v0.1-fine-tuned-7312: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Bielik-7B-Instruct-v0.1-fine-tuned + random_seed=7312 output_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_7312.json deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_997.json - hash: md5 - md5: 787c129090aa1b64e337b236a4391402 - size: 642477 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 - outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/judge_metrics_997.json - hash: md5 - md5: 34de8eabaebe6a96b4b664b664f222e2 - size: 484 - summarize_metrics@data/experiments/predict/en-court-instruct: - cmd: PYTHONPATH=. python scripts/sft/summarize_metrics.py --root-dir data/experiments/predict/en-court-instruct + - hash: md5 + md5: 2d9590869dfe247d7c6335f3cd7dd0c2 + path: configs/model/Bielik-7B-Instruct-v0.1-fine-tuned.yaml + size: 253 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 + outs: + - hash: md5 + md5: 743ea22448bc73a7a991da075fca8841 + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_7312.json + size: 2031343 + predict_pl@pl-court-instruct-Bielik-7B-Instruct-v0.1-fine-tuned-997: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Bielik-7B-Instruct-v0.1-fine-tuned + random_seed=997 output_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_997.json deps: - - path: scripts/sft/summarize_metrics.py - hash: md5 - md5: eb5736f5709f9773acf21bfc28c2e012 - size: 2975 + - hash: md5 + md5: 2d9590869dfe247d7c6335f3cd7dd0c2 + path: configs/model/Bielik-7B-Instruct-v0.1-fine-tuned.yaml + size: 253 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: data/experiments/predict/en-court-instruct/metrics_judge_summary.md - hash: md5 - md5: 6065f2fbff28ab7439d35ddfe03b1938 - size: 4857 - - path: data/experiments/predict/en-court-instruct/metrics_ngram_summary.md - hash: md5 - md5: 1bb66cbd940bd2288f69fbe490465aaa - size: 1031 - evaluate_api_models@en-court-instruct-open_ai_gpt-4o-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/open_ai_gpt-4o/outputs_997.json - --num-proc=-1 + - hash: md5 + md5: 433a4b2aa7870a134277a265d099a588 + path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_997.json + size: 2029482 + predict_pl@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-42: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Llama-3-8B-Instruct + random_seed=42 output_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json deps: - - path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o/outputs_997.json - hash: md5 - md5: 8f70e2baa0b0ae8a320577f5c8a60011 - size: 679432 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - size: 697 + - hash: md5 + md5: 56a95874b3e77e7ffec11c00330da5b6 + path: configs/model/Unsloth-Llama-3-8B-Instruct.yaml + size: 176 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o/metrics_997.json - hash: md5 - md5: ac30bcf3c40000cab61e0914b56aba85 - size: 157 - evaluate_api_models@en-court-instruct-open_ai_gpt-4o-mini-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/outputs_997.json - --num-proc=-1 + - hash: md5 + md5: e99c88720116c951087b6125e5f4be4d + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json + size: 2008073 + predict_pl@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-7312: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Llama-3-8B-Instruct + random_seed=7312 output_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json deps: - - path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/outputs_997.json - hash: md5 - md5: 2a0819011b3eac56e497201a9f67e310 - size: 690306 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - size: 697 + - hash: md5 + md5: 56a95874b3e77e7ffec11c00330da5b6 + path: configs/model/Unsloth-Llama-3-8B-Instruct.yaml + size: 176 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/metrics_997.json - hash: md5 - md5: d70eb0821aff9c9e874a421b80f7f697 - size: 155 - evaluate_api_models@pl-court-instruct-open_ai_gpt-4o-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json - --num-proc=-1 + - hash: md5 + md5: 4c25368aacb7402b1b2cae9368d187d1 + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json + size: 2013637 + predict_pl@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-997: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Llama-3-8B-Instruct + random_seed=997 output_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json deps: - - path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json - hash: md5 - md5: 7c5833fdd1419163b286baaa3d71e084 - size: 1965252 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - size: 697 + - hash: md5 + md5: 56a95874b3e77e7ffec11c00330da5b6 + path: configs/model/Unsloth-Llama-3-8B-Instruct.yaml + size: 176 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/metrics_997.json - hash: md5 - md5: 65c808d4aebd8efe37b94a5128a19de6 - size: 306 - evaluate_api_models@pl-court-instruct-open_ai_gpt-4o-mini-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json - --num-proc=-1 + - hash: md5 + md5: baef589507248af212aaae51602fd999 + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json + size: 2010150 + predict_pl@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-42: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Llama-3-8B-Instruct-fine-tuned + random_seed=42 output_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_42.json deps: - - path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json - hash: md5 - md5: 839c911f542cd7c60c9ae52ef95e9907 - size: 1812429 - - path: scripts/sft/evaluate.py - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - size: 697 + - hash: md5 + md5: 3906c39a5c516f89ddafb7eff21615cd + path: configs/model/Unsloth-Llama-3-8B-Instruct-fine-tuned.yaml + size: 275 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/metrics_997.json - hash: md5 - md5: fe43f0d25b500a0f2fb2d8199b8034fd - size: 305 - evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json - out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_42.json - prompt=en + - hash: md5 + md5: 289b719e8c7166e578417e5706bdc4e3 + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_42.json + size: 1760355 + predict_pl@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-7312: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Llama-3-8B-Instruct-fine-tuned + random_seed=7312 output_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_7312.json deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json - hash: md5 - md5: 761018c0a306fbee63dad2fbc119110d - size: 821683 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: 3906c39a5c516f89ddafb7eff21615cd + path: configs/model/Unsloth-Llama-3-8B-Instruct-fine-tuned.yaml + size: 275 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_42.json - hash: md5 - md5: 77ecbff8c82afbfd6fec098fb87e1218 - size: 478 - evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json - out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_7312.json - prompt=en + - hash: md5 + md5: 25bee3b4ee09b36d636095b4c927a0d3 + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_7312.json + size: 1759194 + predict_pl@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-997: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Llama-3-8B-Instruct-fine-tuned + random_seed=997 output_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_997.json deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json - hash: md5 - md5: a7361535b440251d6ce6232a15cfcdf2 - size: 818877 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: 3906c39a5c516f89ddafb7eff21615cd + path: configs/model/Unsloth-Llama-3-8B-Instruct-fine-tuned.yaml + size: 275 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_7312.json - hash: md5 - md5: f25c9ad98ef817e976def98d6b7d3b5d - size: 482 - evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_42.json - out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/judge_metrics_42.json - prompt=en + - hash: md5 + md5: 82b2c535d99d91b9a34986375bfa31a9 + path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_997.json + size: 1758747 + predict_pl@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3-42: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Mistral-7B-Instruct-v0.3 + random_seed=42 output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_42.json deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_42.json - hash: md5 - md5: 4246a4fafba5e130aac3db6c1c61ce30 - size: 675578 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: d184e20107315876e7751bdc7c3841ad + path: configs/model/Unsloth-Mistral-7B-Instruct-v0.3.yaml + size: 182 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/judge_metrics_42.json - hash: md5 - md5: 5f2cea81c873a3b85ef95ba9a6dc90a5 - size: 487 - evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_7312.json - out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/judge_metrics_7312.json - prompt=en + - hash: md5 + md5: 4a9d3a2bb1dd47a732bd2df8102bc93f + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_42.json + size: 1799957 + predict_pl@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3-7312: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Mistral-7B-Instruct-v0.3 + random_seed=7312 output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_7312.json deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_7312.json - hash: md5 - md5: f0b806eebca2f3ddf49d0ff821856b45 - size: 670935 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: d184e20107315876e7751bdc7c3841ad + path: configs/model/Unsloth-Mistral-7B-Instruct-v0.3.yaml + size: 182 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/judge_metrics_7312.json - hash: md5 - md5: 5cc45cac8a7607e42a8a394593d33396 - size: 486 - evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json - out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_42.json - prompt=en + - hash: md5 + md5: 77e10dd2ec17e12e171e4bcab1a48e08 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_7312.json + size: 1795629 + predict_pl@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3-997: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Mistral-7B-Instruct-v0.3 + random_seed=997 output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_997.json deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json - hash: md5 - md5: 4fe25ad80a20ea5d6200136176b3e4ca - size: 705218 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: d184e20107315876e7751bdc7c3841ad + path: configs/model/Unsloth-Mistral-7B-Instruct-v0.3.yaml + size: 182 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_42.json - hash: md5 - md5: 69901f631da4ffefd09e7cbfac39cd89 - size: 480 - evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json - out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_7312.json - prompt=en + - hash: md5 + md5: bbb883aa388b274bef3e9296df26f68f + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3/outputs_997.json + size: 1795752 + predict_pl@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned-42: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned + random_seed=42 output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_42.json deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json - hash: md5 - md5: cf4fdbf0e26e6c793bdca4edd6e365c0 - size: 703876 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: 828382dc03dbed80cff4a3358321dc4a + path: configs/model/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.yaml + size: 271 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_7312.json - hash: md5 - md5: 860b5c00ace1f2967db9b5a977cfc3ad - size: 478 - evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_42.json - out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/judge_metrics_42.json - prompt=en + - hash: md5 + md5: 156091297490d893f9815d2ffcf17cbf + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_42.json + size: 1792160 + predict_pl@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned-7312: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned + random_seed=7312 output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_7312.json deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_42.json - hash: md5 - md5: 313fa5a662f37cacae4980a04830f422 - size: 642688 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: 828382dc03dbed80cff4a3358321dc4a + path: configs/model/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.yaml + size: 271 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/judge_metrics_42.json - hash: md5 - md5: 974e972a09d844a77840029d642e8077 - size: 486 - evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_7312.json - out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/judge_metrics_7312.json - prompt=en + - hash: md5 + md5: 3d336675e54a706fae45349adbaf6ee4 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_7312.json + size: 1793461 + predict_pl@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned-997: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned + random_seed=997 output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_997.json deps: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_7312.json - hash: md5 - md5: 4ed8db93aa14f1cc98e276d3989efa9e - size: 642730 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: 828382dc03dbed80cff4a3358321dc4a + path: configs/model/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.yaml + size: 271 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: - data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/judge_metrics_7312.json - hash: md5 - md5: 8a9712eb10a8da99d86bab8968fd3207 - size: 485 - evaluate_llm_as_judge_api_models@pl-court-instruct-gpt_4o_mini-open_ai_gpt-4o-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json - out_metric_file=data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/judge_metrics_997.json + - hash: md5 + md5: 55d682fba1c08c68552e98be6b503b4e + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned/outputs_997.json + size: 1790731 + predict_pl@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-42: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Mistral-Nemo-Instruct-2407 + random_seed=42 output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json deps: - - path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json - hash: md5 - md5: 7c5833fdd1419163b286baaa3d71e084 - size: 1965252 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 1e556a79b0f9cf0a9cfdb23ed8077bcc - size: 2172 + - hash: md5 + md5: ca5ac52e503c9f488f98f569811c76dc + path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407.yaml + size: 261 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/judge_metrics_997.json - hash: md5 - md5: 867f10aeb55a3bd46b08c8a75c3bfc60 - size: 1176 - evaluate_llm_as_judge_api_models@pl-court-instruct-gpt_4o_mini-open_ai_gpt-4o-mini-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json - out_metric_file=data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/judge_metrics_997.json + - hash: md5 + md5: 1385f49966e9db2a88a17f53d0887ad8 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json + size: 1741944 + predict_pl@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-7312: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Mistral-Nemo-Instruct-2407 + random_seed=7312 output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json deps: - - path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json - hash: md5 - md5: 839c911f542cd7c60c9ae52ef95e9907 - size: 1812429 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 1e556a79b0f9cf0a9cfdb23ed8077bcc - size: 2172 - outs: - - path: - data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/judge_metrics_997.json - hash: md5 - md5: 24037233e5abe74fe13f69dd4fc5e26a - size: 1173 - evaluate_llm_as_judge_api_models@en-court-instruct-gpt_4o_mini-open_ai_gpt-4o-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/en-court-instruct/open_ai_gpt-4o/outputs_997.json - out_metric_file=data/experiments/predict/en-court-instruct/open_ai_gpt-4o/judge_metrics_997.json + - hash: md5 + md5: ca5ac52e503c9f488f98f569811c76dc + path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407.yaml + size: 261 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 + outs: + - hash: md5 + md5: 924744efce1483e9128579cad7a4454c + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json + size: 1748772 + predict_pl@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-997: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Mistral-Nemo-Instruct-2407 + random_seed=997 output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json deps: - - path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o/outputs_997.json - hash: md5 - md5: 8f70e2baa0b0ae8a320577f5c8a60011 - size: 679432 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 1e556a79b0f9cf0a9cfdb23ed8077bcc - size: 2172 - outs: - - path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o/judge_metrics_997.json - hash: md5 - md5: 41921cec37a7e162f73e7a0d1e106eb1 - size: 482 - evaluate_llm_as_judge_api_models@en-court-instruct-gpt_4o_mini-open_ai_gpt-4o-mini-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/outputs_997.json - out_metric_file=data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/judge_metrics_997.json + - hash: md5 + md5: ca5ac52e503c9f488f98f569811c76dc + path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407.yaml + size: 261 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 + outs: + - hash: md5 + md5: 4d023797a9053fd7df61f6b1796112e9 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json + size: 1747404 + predict_pl@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-42: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned + random_seed=42 output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_42.json deps: - - path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/outputs_997.json - hash: md5 - md5: 2a0819011b3eac56e497201a9f67e310 - size: 690306 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 1e556a79b0f9cf0a9cfdb23ed8077bcc - size: 2172 - outs: - - path: - data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/judge_metrics_997.json - hash: md5 - md5: 4edc8fe239f53890d71291f61b6cc96c - size: 486 - evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_42.json - prompt=pl + - hash: md5 + md5: 1d9e6407d121214f949d56ca5c3425f5 + path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned.yaml + size: 367 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 + outs: + - hash: md5 + md5: 14d4613f7d9495f5fb5f2d7b81f402a9 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_42.json + size: 1825646 + predict_pl@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-7312: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned + random_seed=7312 output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_7312.json deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json - hash: md5 - md5: e99c88720116c951087b6125e5f4be4d - size: 2008073 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: 1d9e6407d121214f949d56ca5c3425f5 + path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned.yaml + size: 367 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_42.json - hash: md5 - md5: 9d9fba0cf2169e9dd9f69579a2182b8e - size: 1172 - evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_7312.json - prompt=pl + - hash: md5 + md5: 302e1dc4f064007e3df88ac1e8acccc5 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_7312.json + size: 1831330 + predict_pl@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-997: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned + random_seed=997 output_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_997.json deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json - hash: md5 - md5: 4c25368aacb7402b1b2cae9368d187d1 - size: 2013637 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: 1d9e6407d121214f949d56ca5c3425f5 + path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned.yaml + size: 367 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_7312.json - hash: md5 - md5: e58171fc082d33c84497a13dabcf766c - size: 1167 - evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_997.json - prompt=pl + - hash: md5 + md5: 41a47dc56efc29b6c2771db68bdacb17 + path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_997.json + size: 1822491 + predict_pl@pl-court-instruct-qra-13b-42: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=qra-13b + random_seed=42 output_file=data/experiments/predict/pl-court-instruct/qra-13b/outputs_42.json deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json - hash: md5 - md5: baef589507248af212aaae51602fd999 - size: 2010150 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: ab2baba7b6109364d7e04c77232b0f9d + path: configs/model/qra-13b.yaml + size: 152 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_997.json - hash: md5 - md5: f8d16a5298fabe288486822779470cd8 - size: 1165 - evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_42.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_42.json - prompt=pl + - hash: md5 + md5: dd142d2d1c24c499bbe615bf4b74525c + path: data/experiments/predict/pl-court-instruct/qra-13b/outputs_42.json + size: 2247396 + predict_pl@pl-court-instruct-qra-13b-7312: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=qra-13b + random_seed=7312 output_file=data/experiments/predict/pl-court-instruct/qra-13b/outputs_7312.json deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_42.json - hash: md5 - md5: 289b719e8c7166e578417e5706bdc4e3 - size: 1760355 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: ab2baba7b6109364d7e04c77232b0f9d + path: configs/model/qra-13b.yaml + size: 152 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_42.json - hash: md5 - md5: 70398042d030309e7e0bc7ba927136f3 - size: 1167 - evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_7312.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_7312.json - prompt=pl + - hash: md5 + md5: fddb307b29b598df3786fc94d479e918 + path: data/experiments/predict/pl-court-instruct/qra-13b/outputs_7312.json + size: 2254243 + predict_pl@pl-court-instruct-qra-13b-997: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=qra-13b + random_seed=997 output_file=data/experiments/predict/pl-court-instruct/qra-13b/outputs_997.json deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_7312.json - hash: md5 - md5: 25bee3b4ee09b36d636095b4c927a0d3 - size: 1759194 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: ab2baba7b6109364d7e04c77232b0f9d + path: configs/model/qra-13b.yaml + size: 152 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_7312.json - hash: md5 - md5: 9d22089c8d23bbc5a028c748e5522c23 - size: 1157 - evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_997.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_997.json - prompt=pl + - hash: md5 + md5: 72ef8a411b8f5aeb006c99e5868c754d + path: data/experiments/predict/pl-court-instruct/qra-13b/outputs_997.json + size: 2252480 + predict_pl@pl-court-instruct-trurl-13B-academic-42: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=trurl-13B-academic + random_seed=42 output_file=data/experiments/predict/pl-court-instruct/trurl-13B-academic/outputs_42.json deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_997.json - hash: md5 - md5: 82b2c535d99d91b9a34986375bfa31a9 - size: 1758747 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: 3aa3ce4fc9a1958bef82b8dbfd44ab6b + path: configs/model/trurl-13B-academic.yaml + size: 168 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_997.json - hash: md5 - md5: 4222d5b165de8a3a89d71d6519b71b76 - size: 1170 - evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_42.json - prompt=pl + - hash: md5 + md5: bb571102170940efc73f02143a530d5b + path: data/experiments/predict/pl-court-instruct/trurl-13B-academic/outputs_42.json + size: 1289839 + predict_pl@pl-court-instruct-trurl-13B-academic-7312: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=trurl-13B-academic + random_seed=7312 output_file=data/experiments/predict/pl-court-instruct/trurl-13B-academic/outputs_7312.json deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json - hash: md5 - md5: 1385f49966e9db2a88a17f53d0887ad8 - size: 1741944 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: 3aa3ce4fc9a1958bef82b8dbfd44ab6b + path: configs/model/trurl-13B-academic.yaml + size: 168 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_42.json - hash: md5 - md5: f4bac633a65afde9bf5612f35c3089bb - size: 1170 - evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_7312.json - prompt=pl + - hash: md5 + md5: bcd41ca4629d4cec2440a8ed2f02560f + path: data/experiments/predict/pl-court-instruct/trurl-13B-academic/outputs_7312.json + size: 1283974 + predict_pl@pl-court-instruct-trurl-13B-academic-997: + cmd: PYTHONPATH=. python scripts/sft/predict.py dataset=pl-court-instruct model=trurl-13B-academic + random_seed=997 output_file=data/experiments/predict/pl-court-instruct/trurl-13B-academic/outputs_997.json deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json - hash: md5 - md5: 924744efce1483e9128579cad7a4454c - size: 1748772 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: 3aa3ce4fc9a1958bef82b8dbfd44ab6b + path: configs/model/trurl-13B-academic.yaml + size: 168 + - hash: md5 + md5: 5fc8b9ac571d4a2209d7d866697252ab + path: configs/predict.yaml + size: 402 + - hash: md5 + md5: f9acd63cd4d682ae2242d7b51f0d974b + path: scripts/sft/predict.py + size: 3198 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_7312.json - hash: md5 - md5: 1f95777ef87a547fa7a41dc597adfc39 - size: 1166 - evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_997.json - prompt=pl + - hash: md5 + md5: 731cff0eb1484682de211336efeff153 + path: data/experiments/predict/pl-court-instruct/trurl-13B-academic/outputs_997.json + size: 1288941 + predict_with_api@en-court-instruct-gpt-4o-997: + cmd: PYTHONPATH=. python scripts/sft/predict_with_api.py dataset=en-court-instruct + model_version=gpt-4o seed=997 output_file=data/experiments/predict/en-court-instruct/open_ai_gpt-4o/outputs_997.json deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json - hash: md5 - md5: 4d023797a9053fd7df61f6b1796112e9 - size: 1747404 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: aff18078742a14c3d8ce2cd74e718d44 + path: configs/predict_with_api.yaml + size: 320 + - hash: md5 + md5: 142508c7b6df391083b0e81a3a6c4795 + path: scripts/sft/predict_with_api.py + size: 3968 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_997.json - hash: md5 - md5: de3f557dfdf3440262e4d8f811e526ca - size: 1167 - evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_42.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_42.json - prompt=pl + - hash: md5 + md5: 8f70e2baa0b0ae8a320577f5c8a60011 + path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o/outputs_997.json + size: 679432 + predict_with_api@en-court-instruct-gpt-4o-mini-997: + cmd: PYTHONPATH=. python scripts/sft/predict_with_api.py dataset=en-court-instruct + model_version=gpt-4o-mini seed=997 output_file=data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/outputs_997.json deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_42.json - hash: md5 - md5: 14d4613f7d9495f5fb5f2d7b81f402a9 - size: 1825646 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: aff18078742a14c3d8ce2cd74e718d44 + path: configs/predict_with_api.yaml + size: 320 + - hash: md5 + md5: 142508c7b6df391083b0e81a3a6c4795 + path: scripts/sft/predict_with_api.py + size: 3968 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_42.json - hash: md5 - md5: e8cff190991ee3164825dbf7eca03d12 - size: 1170 - evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_7312.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_7312.json - prompt=pl + - hash: md5 + md5: 2a0819011b3eac56e497201a9f67e310 + path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/outputs_997.json + size: 690306 + predict_with_api@pl-court-instruct-gpt-4o-997: + cmd: PYTHONPATH=. python scripts/sft/predict_with_api.py dataset=pl-court-instruct + model_version=gpt-4o seed=997 output_file=data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_7312.json - hash: md5 - md5: 302e1dc4f064007e3df88ac1e8acccc5 - size: 1831330 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: aff18078742a14c3d8ce2cd74e718d44 + path: configs/predict_with_api.yaml + size: 320 + - hash: md5 + md5: 142508c7b6df391083b0e81a3a6c4795 + path: scripts/sft/predict_with_api.py + size: 3968 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_7312.json - hash: md5 - md5: aee4a08e0a4d0398b34a2587c039244d - size: 1169 - evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_997.json - out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_997.json - prompt=pl + - hash: md5 + md5: 7c5833fdd1419163b286baaa3d71e084 + path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json + size: 1965252 + predict_with_api@pl-court-instruct-gpt-4o-mini-997: + cmd: PYTHONPATH=. python scripts/sft/predict_with_api.py dataset=pl-court-instruct + model_version=gpt-4o-mini seed=997 output_file=data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json deps: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_997.json - hash: md5 - md5: 41a47dc56efc29b6c2771db68bdacb17 - size: 1822491 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: aff18078742a14c3d8ce2cd74e718d44 + path: configs/predict_with_api.yaml + size: 320 + - hash: md5 + md5: 142508c7b6df391083b0e81a3a6c4795 + path: scripts/sft/predict_with_api.py + size: 3968 outs: - - path: - data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_997.json - hash: md5 - md5: aac703269b10c85d1a2b5303c22ca077 - size: 1168 - evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-7B-Instruct-v0.1-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json - out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_42.json - prompt=pl + - hash: md5 + md5: 839c911f542cd7c60c9ae52ef95e9907 + path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json + size: 1812429 + raw_dataset_readme: + cmd: jupyter nbconvert --no-input --to markdown --execute 'nbs/Dataset Cards/01_Dataset_Description_Raw.ipynb' + --output-dir data/datasets/pl/readme/raw --output README deps: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json - hash: md5 - md5: 2dc39513a04910c5d0c54380166639d9 - size: 2029644 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: 622ba21868561c26fb6877ad95bfb5c5.dir + nfiles: 17 + path: data/datasets/pl/raw + size: 10234505621 + - hash: md5 + md5: 11b39233ef419de713493cb5ec8bcfd9 + path: nbs/Dataset Cards/01_Dataset_Description_Raw.ipynb + size: 77118 outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_42.json - hash: md5 - md5: 243da4df07c6dfb5199b925e3f5c07aa - size: 1137 - evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-7B-Instruct-v0.1-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json - out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_7312.json - prompt=pl + - hash: md5 + md5: c82b8238e3043491c6fa49e9641e8dac.dir + nfiles: 8 + path: data/datasets/pl/readme/raw/ + size: 475420 + sft_unsloth@en-court-instruct-Unsloth-Llama-3-8B-Instruct: + cmd: PYTHONPATH=. python scripts/sft/fine_tune_llm.py dataset=en-court-instruct + model=Unsloth-Llama-3-8B-Instruct deps: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json - hash: md5 - md5: ae39bf31296ffe82c0f6a3e8c9ff63aa - size: 2014399 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: 3933c4faf5a478d0f9d3963c3b29e5cc + path: configs/fine_tuning.yaml + size: 1356 + - hash: md5 + md5: 56a95874b3e77e7ffec11c00330da5b6 + path: configs/model/Unsloth-Llama-3-8B-Instruct.yaml + size: 176 + - hash: md5 + md5: 4b77ee1ea604cae18f17ca00cdb6988b + path: scripts/sft/fine_tune_llm.py + size: 4578 outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_7312.json - hash: md5 - md5: 8098cc937d57455ca47d32c3449159a3 - size: 1129 - evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-7B-Instruct-v0.1-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_997.json - out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_997.json - prompt=pl + - hash: md5 + md5: c99c2a68274325db86fbbd41bcc30e78.dir + nfiles: 18 + path: data/experiments/fine-tune/Unsloth-Llama-3-8B-Instruct/en-court-instruct/ + size: 354395477 + sft_unsloth@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407: + cmd: PYTHONPATH=. python scripts/sft/fine_tune_llm.py dataset=en-court-instruct + model=Unsloth-Mistral-Nemo-Instruct-2407 deps: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_997.json - hash: md5 - md5: fac04d78ad020b50f79fc7277a037e8e - size: 2016400 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: 3933c4faf5a478d0f9d3963c3b29e5cc + path: configs/fine_tuning.yaml + size: 1356 + - hash: md5 + md5: ca5ac52e503c9f488f98f569811c76dc + path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407.yaml + size: 261 + - hash: md5 + md5: 4b77ee1ea604cae18f17ca00cdb6988b + path: scripts/sft/fine_tune_llm.py + size: 4578 outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_997.json - hash: md5 - md5: f1390b2d50893a17c90fc277dc363d6a - size: 1139 - evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-7B-Instruct-v0.1-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_42.json - out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_42.json - prompt=pl + - hash: md5 + md5: 4c4f973ee0648610fc4b696059fae47a.dir + nfiles: 18 + path: data/experiments/fine-tune/Unsloth-Mistral-Nemo-Instruct-2407/en-court-instruct/ + size: 475726484 + sft_unsloth@pl-court-instruct-Bielik-11B-v2.2-Instruct: + cmd: PYTHONPATH=. python scripts/sft/fine_tune_llm.py dataset=pl-court-instruct + model=Bielik-11B-v2.2-Instruct deps: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_42.json - hash: md5 - md5: 178eb0649617d4a698da6c9e315e84c5 - size: 2034749 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: 3933c4faf5a478d0f9d3963c3b29e5cc + path: configs/fine_tuning.yaml + size: 1356 + - hash: md5 + md5: 1cfb3fbe30fac3e07a30339e6bf197c9 + path: configs/model/Bielik-11B-v2.2-Instruct.yaml + size: 175 + - hash: md5 + md5: 4b77ee1ea604cae18f17ca00cdb6988b + path: scripts/sft/fine_tune_llm.py + size: 4578 outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_42.json - hash: md5 - md5: 302b957707520fa327d1da0edf18baa3 - size: 1167 - evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-7B-Instruct-v0.1-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_7312.json - out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_7312.json - prompt=pl + - hash: md5 + md5: 6acbafad481c0a8d3e6a989aa50dce46.dir + nfiles: 39 + path: data/experiments/fine-tune/Bielik-11B-v2.2-Instruct/pl-court-instruct/ + size: 1189374238 + sft_unsloth@pl-court-instruct-Bielik-7B-Instruct-v0.1: + cmd: PYTHONPATH=. python scripts/sft/fine_tune_llm.py dataset=pl-court-instruct + model=Bielik-7B-Instruct-v0.1 deps: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_7312.json - hash: md5 - md5: 743ea22448bc73a7a991da075fca8841 - size: 2031343 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: 3933c4faf5a478d0f9d3963c3b29e5cc + path: configs/fine_tuning.yaml + size: 1356 + - hash: md5 + md5: c3412525e9819b53fbad06363a07a871 + path: configs/model/Bielik-7B-Instruct-v0.1.yaml + size: 173 + - hash: md5 + md5: 4b77ee1ea604cae18f17ca00cdb6988b + path: scripts/sft/fine_tune_llm.py + size: 4578 outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_7312.json - hash: md5 - md5: 789f0906846251d3f0cab78d111f9c56 - size: 1163 - evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-7B-Instruct-v0.1-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_997.json - out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_997.json - prompt=pl + - hash: md5 + md5: be61ab5ea1365c1bcf908952bc015ab4.dir + nfiles: 108 + path: data/experiments/fine-tune/Bielik-7B-Instruct-v0.1/pl-court-instruct/ + size: 2293711014 + sft_unsloth@pl-court-instruct-Unsloth-Llama-3-8B-Instruct: + cmd: PYTHONPATH=. python scripts/sft/fine_tune_llm.py dataset=pl-court-instruct + model=Unsloth-Llama-3-8B-Instruct deps: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_997.json - hash: md5 - md5: 433a4b2aa7870a134277a265d099a588 - size: 2029482 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: 3933c4faf5a478d0f9d3963c3b29e5cc + path: configs/fine_tuning.yaml + size: 1356 + - hash: md5 + md5: 56a95874b3e77e7ffec11c00330da5b6 + path: configs/model/Unsloth-Llama-3-8B-Instruct.yaml + size: 176 + - hash: md5 + md5: 4b77ee1ea604cae18f17ca00cdb6988b + path: scripts/sft/fine_tune_llm.py + size: 4578 outs: - - path: - data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_997.json - hash: md5 - md5: 90f3ed04ef29c5cd29b7ec8f02a780a1 - size: 1163 - evaluate_llm_as_judge_api_models@pl-gpt_4o_mini-open_ai_gpt-4o-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json - out_metric_file=data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/judge_metrics_997.json - prompt=pl + - hash: md5 + md5: d9850d30d221f257e1453a66a6c1eef3.dir + nfiles: 33 + path: data/experiments/fine-tune/Unsloth-Llama-3-8B-Instruct/pl-court-instruct/ + size: 784320233 + sft_unsloth@pl-court-instruct-Unsloth-Mistral-7B-Instruct-v0.3: + cmd: PYTHONPATH=. python scripts/sft/fine_tune_llm.py dataset=pl-court-instruct + model=Unsloth-Mistral-7B-Instruct-v0.3 deps: - - path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json - hash: md5 - md5: 7c5833fdd1419163b286baaa3d71e084 - size: 1965252 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: 3933c4faf5a478d0f9d3963c3b29e5cc + path: configs/fine_tuning.yaml + size: 1356 + - hash: md5 + md5: d184e20107315876e7751bdc7c3841ad + path: configs/model/Unsloth-Mistral-7B-Instruct-v0.3.yaml + size: 182 + - hash: md5 + md5: 4b77ee1ea604cae18f17ca00cdb6988b + path: scripts/sft/fine_tune_llm.py + size: 4578 outs: - - path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/judge_metrics_997.json - hash: md5 - md5: 867f10aeb55a3bd46b08c8a75c3bfc60 - size: 1176 - evaluate_llm_as_judge_api_models@pl-gpt_4o_mini-open_ai_gpt-4o-mini-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json - out_metric_file=data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/judge_metrics_997.json - prompt=pl + - hash: md5 + md5: 1b47e8203c533942e1903dd816f7a7f7.dir + nfiles: 66 + path: data/experiments/fine-tune/Unsloth-Mistral-7B-Instruct-v0.3/pl-court-instruct/ + size: 1518954466 + sft_unsloth@pl-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407: + cmd: PYTHONPATH=. python scripts/sft/fine_tune_llm.py dataset=pl-court-instruct + model=Unsloth-Mistral-Nemo-Instruct-2407 deps: - - path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json - hash: md5 - md5: 839c911f542cd7c60c9ae52ef95e9907 - size: 1812429 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: 3933c4faf5a478d0f9d3963c3b29e5cc + path: configs/fine_tuning.yaml + size: 1356 + - hash: md5 + md5: ca5ac52e503c9f488f98f569811c76dc + path: configs/model/Unsloth-Mistral-Nemo-Instruct-2407.yaml + size: 261 + - hash: md5 + md5: 4b77ee1ea604cae18f17ca00cdb6988b + path: scripts/sft/fine_tune_llm.py + size: 4578 outs: - - path: - data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/judge_metrics_997.json - hash: md5 - md5: 24037233e5abe74fe13f69dd4fc5e26a - size: 1173 - evaluate_llm_as_judge_api_models@en-gpt_4o_mini-open_ai_gpt-4o-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/en-court-instruct/open_ai_gpt-4o/outputs_997.json - out_metric_file=data/experiments/predict/en-court-instruct/open_ai_gpt-4o/judge_metrics_997.json - prompt=en + - hash: md5 + md5: 80bceb56982e9bdb8d4b441bf843014f.dir + nfiles: 33 + path: data/experiments/fine-tune/Unsloth-Mistral-Nemo-Instruct-2407/pl-court-instruct/ + size: 1056899473 + summarize_metrics@data/experiments/predict/en-court-instruct: + cmd: PYTHONPATH=. python scripts/sft/summarize_metrics.py --root-dir data/experiments/predict/en-court-instruct deps: - - path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o/outputs_997.json - hash: md5 - md5: 8f70e2baa0b0ae8a320577f5c8a60011 - size: 679432 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: eb5736f5709f9773acf21bfc28c2e012 + path: scripts/sft/summarize_metrics.py + size: 2975 outs: - - path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o/judge_metrics_997.json - hash: md5 - md5: 1ad8736bed0fff4e88a9c32775f370bf - size: 481 - evaluate_llm_as_judge_api_models@en-gpt_4o_mini-open_ai_gpt-4o-mini-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini - answers_file=data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/outputs_997.json - out_metric_file=data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/judge_metrics_997.json - prompt=en + - hash: md5 + md5: 6065f2fbff28ab7439d35ddfe03b1938 + path: data/experiments/predict/en-court-instruct/metrics_judge_summary.md + size: 4857 + - hash: md5 + md5: 1bb66cbd940bd2288f69fbe490465aaa + path: data/experiments/predict/en-court-instruct/metrics_ngram_summary.md + size: 1031 + summarize_metrics@data/experiments/predict/pl-court-instruct: + cmd: PYTHONPATH=. python scripts/sft/summarize_metrics.py --root-dir data/experiments/predict/pl-court-instruct deps: - - path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/outputs_997.json - hash: md5 - md5: 2a0819011b3eac56e497201a9f67e310 - size: 690306 - - path: scripts/sft/evaluate_llm_as_judge.py - hash: md5 - md5: 79a02fb864cb279f93fc4171043bb31c - size: 2253 + - hash: md5 + md5: eb5736f5709f9773acf21bfc28c2e012 + path: scripts/sft/summarize_metrics.py + size: 2975 outs: - - path: - data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/judge_metrics_997.json - hash: md5 - md5: bd272bea099716c0c2e689a2d19c0071 - size: 488 + - hash: md5 + md5: 3a94f7b7932b8404b88df0236a82ca4d + path: data/experiments/predict/pl-court-instruct/metrics_judge_summary.md + size: 14993 + - hash: md5 + md5: 1a3cc443d01b7a5a86657e686f3ea763 + path: data/experiments/predict/pl-court-instruct/metrics_ngram_summary.md + size: 3263 diff --git a/dvc.yaml b/dvc.yaml index a351a8e..4c286f7 100644 --- a/dvc.yaml +++ b/dvc.yaml @@ -113,7 +113,7 @@ stages: - data/experiments/fine-tune/${item.model}/${item.dataset}/ ### Prediction ### - predict: + predict_pl: matrix: dataset: - pl-court-instruct @@ -129,6 +129,7 @@ stages: - trurl-13B-academic - qra-13b - Bielik-11B-v2.2-Instruct + - Bielik-11B-v2.2-Instruct-fine-tuned seed: ${seeds} cmd: >- PYTHONPATH=. python scripts/sft/predict.py @@ -189,7 +190,7 @@ stages: - data/experiments/predict/${item.dataset}/open_ai_${item.model}/outputs_${item.seed}.json ### Evaluation ### - evaluate: + evaluate_pl: matrix: dataset: - pl-court-instruct @@ -204,6 +205,8 @@ stages: - Bielik-7B-Instruct-v0.1-fine-tuned - trurl-13B-academic - qra-13b + - Bielik-11B-v2.2-Instruct + - Bielik-11B-v2.2-Instruct-fine-tuned seed: ${seeds} cmd: >- PYTHONPATH=. python scripts/sft/evaluate.py @@ -265,6 +268,8 @@ stages: - Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned - Bielik-7B-Instruct-v0.1 - Bielik-7B-Instruct-v0.1-fine-tuned + - Bielik-11B-v2.2-Instruct + - Bielik-11B-v2.2-Instruct-fine-tuned seed: ${seeds} cmd: >- PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py