diff --git a/configs/embedding.yaml b/configs/embedding.yaml index 37afafe..cb416ec 100644 --- a/configs/embedding.yaml +++ b/configs/embedding.yaml @@ -10,6 +10,7 @@ chunk_config: chunk_size: ${embedding_model.max_seq_length} min_split_chars: 10 take_n_first_chunks: 16 + chunk_overlap: 32 batch_size: 64 output_dir: data/embeddings/${dataset.name}/${hydra:runtime.choices.embedding_model}/all_embeddings diff --git a/dvc.lock b/dvc.lock index 7c5fb51..47c5437 100644 --- a/dvc.lock +++ b/dvc.lock @@ -65,29 +65,29 @@ stages: embed@mmlw-roberta-large: cmd: PYTHONPATH=. python scripts/embed/embed_text.py embedding_model=mmlw-roberta-large deps: - - hash: md5 - md5: 22fa56f7d7d5a1c1372a8a8b57b02ba8 - path: configs/embedding.yaml - size: 467 - - hash: md5 + - path: configs/embedding.yaml + hash: md5 + md5: 9a163f8656c6efa150fd7f939bb32e49 + size: 477 + - path: configs/embedding_model/mmlw-roberta-large.yaml + hash: md5 md5: 22f36cfd196c0fdc3cfd8a036d52b606 - path: configs/embedding_model/mmlw-roberta-large.yaml size: 52 - - hash: md5 - md5: 5dd44be2eea852bcce3d0918ff8b97da.dir + - path: data/datasets/pl/raw + hash: md5 + md5: 622ba21868561c26fb6877ad95bfb5c5.dir + size: 10234505621 nfiles: 17 - path: data/datasets/pl/raw - size: 10234880729 - - hash: md5 - md5: a2953ae4974ef96d62063b5c2711e967 - path: scripts/embed/embed_text.py - size: 3549 - outs: - - hash: md5 - md5: 1a086db46b90b0f3c4c66c3ecefe8adb.dir - nfiles: 53 - path: data/embeddings/pl-court-raw/mmlw-roberta-large/all_embeddings - size: 24415235644 + - path: scripts/embed/embed_text.py + hash: md5 + md5: d9f127f2e92afa40f23ebcd6cf540cb9 + size: 3743 + outs: + - path: data/embeddings/pl-court-raw/mmlw-roberta-large/all_embeddings + hash: md5 + md5: a8a4a370199cce269899df89f4e33fdc.dir + size: 23430894782 + nfiles: 51 evaluate_api_models@en-court-instruct-open_ai_gpt-4o-997: cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/open_ai_gpt-4o/outputs_997.json --num-proc=-1 @@ -157,12 +157,14 @@ stages: path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/metrics_997.json size: 305 evaluate_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json --num-proc=-1 deps: - hash: md5 md5: 761018c0a306fbee63dad2fbc119110d - path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json size: 821683 - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 @@ -171,15 +173,18 @@ stages: outs: - hash: md5 md5: 265776ba10a7b24b66e6bac1131e0c48 - path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_42.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_42.json size: 149 evaluate_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json --num-proc=-1 deps: - hash: md5 md5: a7361535b440251d6ce6232a15cfcdf2 - path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json size: 818877 - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 @@ -188,15 +193,18 @@ stages: outs: - hash: md5 md5: 97fa8dfaa5e57633e8fb6a7d073177f5 - path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_7312.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_7312.json size: 147 evaluate_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json --num-proc=-1 deps: - hash: md5 md5: 94924275d576271875fecf22c0f9b39e - path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json size: 817490 - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 @@ -205,15 +213,18 @@ stages: outs: - hash: md5 md5: c3552161ec68d8cc6a8e5b75f02e22e2 - path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_997.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_997.json size: 147 evaluate_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_42.json + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_42.json --num-proc=-1 deps: - hash: md5 md5: 4246a4fafba5e130aac3db6c1c61ce30 - path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_42.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_42.json size: 675578 - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 @@ -222,15 +233,18 @@ stages: outs: - hash: md5 md5: 016d1c87b2925c6f941400d178bee018 - path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/metrics_42.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/metrics_42.json size: 157 evaluate_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_7312.json + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_7312.json --num-proc=-1 deps: - hash: md5 md5: f0b806eebca2f3ddf49d0ff821856b45 - path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_7312.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_7312.json size: 670935 - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 @@ -239,15 +253,18 @@ stages: outs: - hash: md5 md5: a8459393feb773fea85ede4b831b3fa6 - path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/metrics_7312.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/metrics_7312.json size: 157 evaluate_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_997.json + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_997.json --num-proc=-1 deps: - hash: md5 md5: 4e968cac351ad48ad786d1ecccbbc967 - path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_997.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_997.json size: 670674 - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 @@ -256,15 +273,18 @@ stages: outs: - hash: md5 md5: 21bc79aad7ab2e97b75e1d3fb18a2263 - path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/metrics_997.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/metrics_997.json size: 157 evaluate_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json --num-proc=-1 deps: - hash: md5 md5: 4fe25ad80a20ea5d6200136176b3e4ca - path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json size: 705218 - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 @@ -273,15 +293,18 @@ stages: outs: - hash: md5 md5: 0b2f663a1cbc3ef08c363ec8adc53c15 - path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_42.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_42.json size: 151 evaluate_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json --num-proc=-1 deps: - hash: md5 md5: cf4fdbf0e26e6c793bdca4edd6e365c0 - path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json size: 703876 - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 @@ -290,15 +313,18 @@ stages: outs: - hash: md5 md5: 604b5cee14ec6520b88bafecc962e031 - path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_7312.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_7312.json size: 152 evaluate_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json --num-proc=-1 deps: - hash: md5 md5: 94c30cf8fe7db71afc58a5c9cdbc0d9f - path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json size: 705894 - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 @@ -307,15 +333,18 @@ stages: outs: - hash: md5 md5: a91ec5b434bebd8ce1d2000e0a033cb9 - path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_997.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_997.json size: 152 evaluate_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_42.json + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_42.json --num-proc=-1 deps: - hash: md5 md5: 313fa5a662f37cacae4980a04830f422 - path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_42.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_42.json size: 642688 - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 @@ -324,15 +353,18 @@ stages: outs: - hash: md5 md5: f0d37c5ac017c0e488b7c3bed01c7093 - path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/metrics_42.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/metrics_42.json size: 156 evaluate_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_7312.json + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_7312.json --num-proc=-1 deps: - hash: md5 md5: 4ed8db93aa14f1cc98e276d3989efa9e - path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_7312.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_7312.json size: 642730 - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 @@ -341,15 +373,18 @@ stages: outs: - hash: md5 md5: a1521ab06a56258759953bb02ae87e24 - path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/metrics_7312.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/metrics_7312.json size: 157 evaluate_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_997.json + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_997.json --num-proc=-1 deps: - hash: md5 md5: 787c129090aa1b64e337b236a4391402 - path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_997.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_997.json size: 642477 - hash: md5 md5: 0644efb76af2c5461185e37a07ba2c17 @@ -358,7 +393,8 @@ stages: outs: - hash: md5 md5: f3339245ea358de4b1348c8393153946 - path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/metrics_997.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/metrics_997.json size: 157 evaluate_llm_as_judge_api_models@en-gpt_4o_mini-open_ai_gpt-4o-997: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -396,7 +432,8 @@ stages: outs: - hash: md5 md5: bd272bea099716c0c2e689a2d19c0071 - path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/judge_metrics_997.json + path: + data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/judge_metrics_997.json size: 488 evaluate_llm_as_judge_api_models@pl-gpt_4o_mini-open_ai_gpt-4o-997: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -434,7 +471,8 @@ stages: outs: - hash: md5 md5: 24037233e5abe74fe13f69dd4fc5e26a - path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/judge_metrics_997.json + path: + data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/judge_metrics_997.json size: 1173 evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-42: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -444,7 +482,8 @@ stages: deps: - hash: md5 md5: 761018c0a306fbee63dad2fbc119110d - path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json size: 821683 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -453,7 +492,8 @@ stages: outs: - hash: md5 md5: 77ecbff8c82afbfd6fec098fb87e1218 - path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_42.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_42.json size: 478 evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-7312: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -463,7 +503,8 @@ stages: deps: - hash: md5 md5: a7361535b440251d6ce6232a15cfcdf2 - path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json size: 818877 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -472,7 +513,8 @@ stages: outs: - hash: md5 md5: f25c9ad98ef817e976def98d6b7d3b5d - path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_7312.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_7312.json size: 482 evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-997: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -482,7 +524,8 @@ stages: deps: - hash: md5 md5: 94924275d576271875fecf22c0f9b39e - path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json size: 817490 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -491,7 +534,8 @@ stages: outs: - hash: md5 md5: 4395c32931d25a1bd9aa092c5a0e5460 - path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_997.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_997.json size: 478 evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-42: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -501,7 +545,8 @@ stages: deps: - hash: md5 md5: 4246a4fafba5e130aac3db6c1c61ce30 - path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_42.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_42.json size: 675578 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -510,7 +555,8 @@ stages: outs: - hash: md5 md5: 5f2cea81c873a3b85ef95ba9a6dc90a5 - path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/judge_metrics_42.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/judge_metrics_42.json size: 487 evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-7312: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -520,7 +566,8 @@ stages: deps: - hash: md5 md5: f0b806eebca2f3ddf49d0ff821856b45 - path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_7312.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_7312.json size: 670935 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -529,7 +576,8 @@ stages: outs: - hash: md5 md5: 5cc45cac8a7607e42a8a394593d33396 - path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/judge_metrics_7312.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/judge_metrics_7312.json size: 486 evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-997: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -539,7 +587,8 @@ stages: deps: - hash: md5 md5: 4e968cac351ad48ad786d1ecccbbc967 - path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_997.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_997.json size: 670674 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -548,7 +597,8 @@ stages: outs: - hash: md5 md5: 90c2b0cd132130d0b9d3a60bf6fdd69b - path: data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/judge_metrics_997.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/judge_metrics_997.json size: 486 evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-42: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -558,7 +608,8 @@ stages: deps: - hash: md5 md5: 4fe25ad80a20ea5d6200136176b3e4ca - path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json size: 705218 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -567,7 +618,8 @@ stages: outs: - hash: md5 md5: 69901f631da4ffefd09e7cbfac39cd89 - path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_42.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_42.json size: 480 evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-7312: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -577,7 +629,8 @@ stages: deps: - hash: md5 md5: cf4fdbf0e26e6c793bdca4edd6e365c0 - path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json size: 703876 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -586,7 +639,8 @@ stages: outs: - hash: md5 md5: 860b5c00ace1f2967db9b5a977cfc3ad - path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_7312.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_7312.json size: 478 evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-997: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -596,7 +650,8 @@ stages: deps: - hash: md5 md5: 94c30cf8fe7db71afc58a5c9cdbc0d9f - path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json size: 705894 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -605,7 +660,8 @@ stages: outs: - hash: md5 md5: 860b5c00ace1f2967db9b5a977cfc3ad - path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_997.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_997.json size: 478 evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-42: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -615,7 +671,8 @@ stages: deps: - hash: md5 md5: 313fa5a662f37cacae4980a04830f422 - path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_42.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_42.json size: 642688 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -624,7 +681,8 @@ stages: outs: - hash: md5 md5: 974e972a09d844a77840029d642e8077 - path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/judge_metrics_42.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/judge_metrics_42.json size: 486 evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-7312: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -634,7 +692,8 @@ stages: deps: - hash: md5 md5: 4ed8db93aa14f1cc98e276d3989efa9e - path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_7312.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_7312.json size: 642730 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -643,7 +702,8 @@ stages: outs: - hash: md5 md5: 8a9712eb10a8da99d86bab8968fd3207 - path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/judge_metrics_7312.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/judge_metrics_7312.json size: 485 evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-997: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -653,7 +713,8 @@ stages: deps: - hash: md5 md5: 787c129090aa1b64e337b236a4391402 - path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_997.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_997.json size: 642477 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -662,7 +723,8 @@ stages: outs: - hash: md5 md5: 34de8eabaebe6a96b4b664b664f222e2 - path: data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/judge_metrics_997.json + path: + data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/judge_metrics_997.json size: 484 evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-11B-v2.2-Instruct-42: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -672,7 +734,8 @@ stages: deps: - hash: md5 md5: c3e404c898e3e193ac3aa910187b4f9f - path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_42.json + path: + data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_42.json size: 1734129 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -681,7 +744,8 @@ stages: outs: - hash: md5 md5: 198f24599357bc230bf9f1e39a235a44 - path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/judge_metrics_42.json + path: + data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/judge_metrics_42.json size: 1172 evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-11B-v2.2-Instruct-7312: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -691,7 +755,8 @@ stages: deps: - hash: md5 md5: d4a2ab2393a58f0d7e1897859eccb626 - path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_7312.json + path: + data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_7312.json size: 1734772 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -700,7 +765,8 @@ stages: outs: - hash: md5 md5: 81cfdaa675ef2118cf923e57cc54d201 - path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/judge_metrics_7312.json + path: + data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/judge_metrics_7312.json size: 1161 evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-11B-v2.2-Instruct-997: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -710,7 +776,8 @@ stages: deps: - hash: md5 md5: 8f4f6bc97e33b3b2728bebb7620a4968 - path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_997.json + path: + data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_997.json size: 1731689 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -719,7 +786,8 @@ stages: outs: - hash: md5 md5: c5861ffaa439ba9bbd95b954d6ab1f3d - path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/judge_metrics_997.json + path: + data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/judge_metrics_997.json size: 1168 evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-11B-v2.2-Instruct-fine-tuned-42: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -729,7 +797,8 @@ stages: deps: - hash: md5 md5: dfd5d7389b312686428cc967aea5a5b9 - path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_42.json + path: + data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_42.json size: 1860743 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -738,7 +807,8 @@ stages: outs: - hash: md5 md5: abcd5722e84ec3e81ff8cf28b8a887cb - path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/judge_metrics_42.json + path: + data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/judge_metrics_42.json size: 1165 evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-11B-v2.2-Instruct-fine-tuned-7312: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -748,7 +818,8 @@ stages: deps: - hash: md5 md5: 8fa2faeda5a577c06cd6bf35b8702330 - path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_7312.json + path: + data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_7312.json size: 1857569 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -757,7 +828,8 @@ stages: outs: - hash: md5 md5: 4b77a3d10cd6027e7e141ba80e9678c2 - path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/judge_metrics_7312.json + path: + data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/judge_metrics_7312.json size: 1160 evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-11B-v2.2-Instruct-fine-tuned-997: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -767,7 +839,8 @@ stages: deps: - hash: md5 md5: ba53d76f701eddb60a182de49d992878 - path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_997.json + path: + data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_997.json size: 1857855 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -776,7 +849,8 @@ stages: outs: - hash: md5 md5: 9e60a1ed6002a0349656c0bd23bc7b1c - path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/judge_metrics_997.json + path: + data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/judge_metrics_997.json size: 1164 evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-7B-Instruct-v0.1-42: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -786,7 +860,8 @@ stages: deps: - hash: md5 md5: 2dc39513a04910c5d0c54380166639d9 - path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json + path: + data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json size: 2029644 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -795,7 +870,8 @@ stages: outs: - hash: md5 md5: 243da4df07c6dfb5199b925e3f5c07aa - path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_42.json + path: + data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_42.json size: 1137 evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-7B-Instruct-v0.1-7312: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -805,7 +881,8 @@ stages: deps: - hash: md5 md5: ae39bf31296ffe82c0f6a3e8c9ff63aa - path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json + path: + data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json size: 2014399 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -814,7 +891,8 @@ stages: outs: - hash: md5 md5: 8098cc937d57455ca47d32c3449159a3 - path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_7312.json + path: + data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_7312.json size: 1129 evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-7B-Instruct-v0.1-997: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -824,7 +902,8 @@ stages: deps: - hash: md5 md5: fac04d78ad020b50f79fc7277a037e8e - path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_997.json + path: + data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_997.json size: 2016400 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -833,7 +912,8 @@ stages: outs: - hash: md5 md5: f1390b2d50893a17c90fc277dc363d6a - path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_997.json + path: + data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_997.json size: 1139 evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-7B-Instruct-v0.1-fine-tuned-42: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -843,7 +923,8 @@ stages: deps: - hash: md5 md5: 178eb0649617d4a698da6c9e315e84c5 - path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_42.json + path: + data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_42.json size: 2034749 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -852,7 +933,8 @@ stages: outs: - hash: md5 md5: 302b957707520fa327d1da0edf18baa3 - path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_42.json + path: + data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_42.json size: 1167 evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-7B-Instruct-v0.1-fine-tuned-7312: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -862,7 +944,8 @@ stages: deps: - hash: md5 md5: 743ea22448bc73a7a991da075fca8841 - path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_7312.json + path: + data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_7312.json size: 2031343 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -871,7 +954,8 @@ stages: outs: - hash: md5 md5: 789f0906846251d3f0cab78d111f9c56 - path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_7312.json + path: + data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_7312.json size: 1163 evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-7B-Instruct-v0.1-fine-tuned-997: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -881,7 +965,8 @@ stages: deps: - hash: md5 md5: 433a4b2aa7870a134277a265d099a588 - path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_997.json + path: + data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_997.json size: 2029482 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -890,7 +975,8 @@ stages: outs: - hash: md5 md5: 90f3ed04ef29c5cd29b7ec8f02a780a1 - path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_997.json + path: + data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_997.json size: 1163 evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-42: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -900,7 +986,8 @@ stages: deps: - hash: md5 md5: e99c88720116c951087b6125e5f4be4d - path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json + path: + data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json size: 2008073 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -909,7 +996,8 @@ stages: outs: - hash: md5 md5: 9d9fba0cf2169e9dd9f69579a2182b8e - path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_42.json + path: + data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_42.json size: 1172 evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-7312: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -919,7 +1007,8 @@ stages: deps: - hash: md5 md5: 4c25368aacb7402b1b2cae9368d187d1 - path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json + path: + data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json size: 2013637 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -928,7 +1017,8 @@ stages: outs: - hash: md5 md5: e58171fc082d33c84497a13dabcf766c - path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_7312.json + path: + data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_7312.json size: 1167 evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-997: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -938,7 +1028,8 @@ stages: deps: - hash: md5 md5: baef589507248af212aaae51602fd999 - path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json + path: + data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json size: 2010150 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -947,7 +1038,8 @@ stages: outs: - hash: md5 md5: f8d16a5298fabe288486822779470cd8 - path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_997.json + path: + data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_997.json size: 1165 evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-fine-tuned-42: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -957,7 +1049,8 @@ stages: deps: - hash: md5 md5: 289b719e8c7166e578417e5706bdc4e3 - path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_42.json + path: + data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_42.json size: 1760355 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -966,7 +1059,8 @@ stages: outs: - hash: md5 md5: 70398042d030309e7e0bc7ba927136f3 - path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_42.json + path: + data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_42.json size: 1167 evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-fine-tuned-7312: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -976,7 +1070,8 @@ stages: deps: - hash: md5 md5: 25bee3b4ee09b36d636095b4c927a0d3 - path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_7312.json + path: + data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_7312.json size: 1759194 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -985,7 +1080,8 @@ stages: outs: - hash: md5 md5: 9d22089c8d23bbc5a028c748e5522c23 - path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_7312.json + path: + data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_7312.json size: 1157 evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-fine-tuned-997: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -995,7 +1091,8 @@ stages: deps: - hash: md5 md5: 82b2c535d99d91b9a34986375bfa31a9 - path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_997.json + path: + data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/outputs_997.json size: 1758747 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -1004,7 +1101,8 @@ stages: outs: - hash: md5 md5: 4222d5b165de8a3a89d71d6519b71b76 - path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_997.json + path: + data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned/judge_metrics_997.json size: 1170 evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-42: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -1014,7 +1112,8 @@ stages: deps: - hash: md5 md5: 1385f49966e9db2a88a17f53d0887ad8 - path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json + path: + data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json size: 1741944 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -1023,7 +1122,8 @@ stages: outs: - hash: md5 md5: f4bac633a65afde9bf5612f35c3089bb - path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_42.json + path: + data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_42.json size: 1170 evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-7312: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -1033,7 +1133,8 @@ stages: deps: - hash: md5 md5: 924744efce1483e9128579cad7a4454c - path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json + path: + data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json size: 1748772 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -1042,7 +1143,8 @@ stages: outs: - hash: md5 md5: 1f95777ef87a547fa7a41dc597adfc39 - path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_7312.json + path: + data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_7312.json size: 1166 evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-997: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -1052,7 +1154,8 @@ stages: deps: - hash: md5 md5: 4d023797a9053fd7df61f6b1796112e9 - path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json + path: + data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json size: 1747404 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -1061,7 +1164,8 @@ stages: outs: - hash: md5 md5: de3f557dfdf3440262e4d8f811e526ca - path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_997.json + path: + data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_997.json size: 1167 evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-42: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -1071,7 +1175,8 @@ stages: deps: - hash: md5 md5: 14d4613f7d9495f5fb5f2d7b81f402a9 - path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_42.json + path: + data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_42.json size: 1825646 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -1080,7 +1185,8 @@ stages: outs: - hash: md5 md5: e8cff190991ee3164825dbf7eca03d12 - path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_42.json + path: + data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_42.json size: 1170 evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-7312: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -1090,7 +1196,8 @@ stages: deps: - hash: md5 md5: 302e1dc4f064007e3df88ac1e8acccc5 - path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_7312.json + path: + data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_7312.json size: 1831330 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -1099,7 +1206,8 @@ stages: outs: - hash: md5 md5: aee4a08e0a4d0398b34a2587c039244d - path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_7312.json + path: + data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_7312.json size: 1169 evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-997: cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini @@ -1109,7 +1217,8 @@ stages: deps: - hash: md5 md5: 41a47dc56efc29b6c2771db68bdacb17 - path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_997.json + path: + data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/outputs_997.json size: 1822491 - hash: md5 md5: 79a02fb864cb279f93fc4171043bb31c @@ -1118,231 +1227,12 @@ stages: outs: - hash: md5 md5: aac703269b10c85d1a2b5303c22ca077 - path: data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_997.json + path: + data/experiments/predict/pl-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned/judge_metrics_997.json size: 1168 evaluate_pl@pl-court-instruct-Bielik-11B-v2.2-Instruct-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_42.json - --num-proc=-1 - deps: - - hash: md5 - md5: c3e404c898e3e193ac3aa910187b4f9f - path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_42.json - size: 1734129 - - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - path: scripts/sft/evaluate.py - size: 697 - outs: - - hash: md5 - md5: a75ab0f8f8238ab8c86397dd015fd31d - path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/metrics_42.json - size: 306 - evaluate_pl@pl-court-instruct-Bielik-11B-v2.2-Instruct-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_7312.json - --num-proc=-1 - deps: - - hash: md5 - md5: d4a2ab2393a58f0d7e1897859eccb626 - path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_7312.json - size: 1734772 - - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - path: scripts/sft/evaluate.py - size: 697 - outs: - - hash: md5 - md5: d5861dc30fca8f9bd2d311d924b3905d - path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/metrics_7312.json - size: 305 - evaluate_pl@pl-court-instruct-Bielik-11B-v2.2-Instruct-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_997.json - --num-proc=-1 - deps: - - hash: md5 - md5: 8f4f6bc97e33b3b2728bebb7620a4968 - path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_997.json - size: 1731689 - - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - path: scripts/sft/evaluate.py - size: 697 - outs: - - hash: md5 - md5: cd6699727392af2d61383b05fa962741 - path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/metrics_997.json - size: 306 - evaluate_pl@pl-court-instruct-Bielik-11B-v2.2-Instruct-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_42.json - --num-proc=-1 - deps: - - hash: md5 - md5: dfd5d7389b312686428cc967aea5a5b9 - path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_42.json - size: 1860743 - - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - path: scripts/sft/evaluate.py - size: 697 - outs: - - hash: md5 - md5: d1462bb74d1f8790270a5d97c674891c - path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/metrics_42.json - size: 304 - evaluate_pl@pl-court-instruct-Bielik-11B-v2.2-Instruct-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_7312.json - --num-proc=-1 - deps: - - hash: md5 - md5: 8fa2faeda5a577c06cd6bf35b8702330 - path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_7312.json - size: 1857569 - - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - path: scripts/sft/evaluate.py - size: 697 - outs: - - hash: md5 - md5: 5edacea1e40b97765c7eaa7b4991ab16 - path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/metrics_7312.json - size: 306 - evaluate_pl@pl-court-instruct-Bielik-11B-v2.2-Instruct-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_997.json - --num-proc=-1 - deps: - - hash: md5 - md5: ba53d76f701eddb60a182de49d992878 - path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_997.json - size: 1857855 - - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - path: scripts/sft/evaluate.py - size: 697 - outs: - - hash: md5 - md5: 84fbcf83da746f9e98f70ab22be6f238 - path: data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/metrics_997.json - size: 304 - evaluate_pl@pl-court-instruct-Bielik-7B-Instruct-v0.1-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json - --num-proc=-1 - deps: - - hash: md5 - md5: 2dc39513a04910c5d0c54380166639d9 - path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json - size: 2029644 - - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - path: scripts/sft/evaluate.py - size: 697 - outs: - - hash: md5 - md5: 2cbca38fd0bbdb4df024f76506eeb26c - path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/metrics_42.json - size: 307 - evaluate_pl@pl-court-instruct-Bielik-7B-Instruct-v0.1-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json - --num-proc=-1 - deps: - - hash: md5 - md5: ae39bf31296ffe82c0f6a3e8c9ff63aa - path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json - size: 2014399 - - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - path: scripts/sft/evaluate.py - size: 697 - outs: - - hash: md5 - md5: ad13d47ca88e721be75c79c225e12ee6 - path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/metrics_7312.json - size: 289 - evaluate_pl@pl-court-instruct-Bielik-7B-Instruct-v0.1-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_997.json - --num-proc=-1 - deps: - - hash: md5 - md5: fac04d78ad020b50f79fc7277a037e8e - path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_997.json - size: 2016400 - - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - path: scripts/sft/evaluate.py - size: 697 - outs: - - hash: md5 - md5: 83fb160145ef5e21b43f7c348658ea02 - path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/metrics_997.json - size: 327 - evaluate_pl@pl-court-instruct-Bielik-7B-Instruct-v0.1-fine-tuned-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_42.json - --num-proc=-1 - deps: - - hash: md5 - md5: 178eb0649617d4a698da6c9e315e84c5 - path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_42.json - size: 2034749 - - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - path: scripts/sft/evaluate.py - size: 697 - outs: - - hash: md5 - md5: 99e684c720ca4c4ef6c4276e7d1880ab - path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/metrics_42.json - size: 305 - evaluate_pl@pl-court-instruct-Bielik-7B-Instruct-v0.1-fine-tuned-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_7312.json - --num-proc=-1 - deps: - - hash: md5 - md5: 743ea22448bc73a7a991da075fca8841 - path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_7312.json - size: 2031343 - - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - path: scripts/sft/evaluate.py - size: 697 - outs: - - hash: md5 - md5: 07d798079cedf3dc194242d6a1bc3bcd - path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/metrics_7312.json - size: 306 - evaluate_pl@pl-court-instruct-Bielik-7B-Instruct-v0.1-fine-tuned-997: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_997.json - --num-proc=-1 - deps: - - hash: md5 - md5: 433a4b2aa7870a134277a265d099a588 - path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_997.json - size: 2029482 - - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - path: scripts/sft/evaluate.py - size: 697 - outs: - - hash: md5 - md5: adb7c1e239396bbf6e308f3f1b436099 - path: data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/metrics_997.json - size: 307 - evaluate_pl@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-42: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json - --num-proc=-1 - deps: - - hash: md5 - md5: e99c88720116c951087b6125e5f4be4d - path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json - size: 2008073 - - hash: md5 - md5: 0644efb76af2c5461185e37a07ba2c17 - path: scripts/sft/evaluate.py - size: 697 - outs: - - hash: md5 - md5: 2116481b79c785f94b35852b6e0e4f57 - path: data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_42.json - size: 304 - evaluate_pl@pl-court-instruct-Unsloth-Llama-3-8B-Instruct-7312: - cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json + cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file + data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_42.json --num-proc=-1 deps: - hash: md5 diff --git a/juddges/_modidx.py b/juddges/_modidx.py index 4e513c7..c3de275 100644 --- a/juddges/_modidx.py +++ b/juddges/_modidx.py @@ -11,6 +11,7 @@ 'juddges.data.datasets.utils': {}, 'juddges.data.pl_court_api': {}, 'juddges.data.pl_court_graph': {}, + 'juddges.data.weaviate_db': {}, 'juddges.evaluation.eval_full_text': {}, 'juddges.evaluation.eval_structured': {}, 'juddges.evaluation.eval_structured_llm_judge': {}, diff --git a/juddges/data/weaviate_db.py b/juddges/data/weaviate_db.py new file mode 100644 index 0000000..e1dd412 --- /dev/null +++ b/juddges/data/weaviate_db.py @@ -0,0 +1,112 @@ +import re +from abc import ABC, abstractmethod +from typing import Any, ClassVar + +import weaviate +import weaviate.classes.config as wvcc +from weaviate.auth import Auth, _APIKey + + +class WeaviateDatabase(ABC): + def __init__(self, host: str, port: str, grpc_port: str, api_key: str | None): + self.host = host + self.port = port + self.grpc_port = grpc_port + self.__api_key = api_key + + self.client: weaviate.WeaviateClient + + def __enter__(self) -> "WeaviateDatabase": + self.client = weaviate.connect_to_local( + host=self.host, + port=self.port, + grpc_port=self.grpc_port, + auth_credentials=self.api_key, + ) + self.create_collections() + return self + + def __exit__(self, exc_type, exc_value, traceback) -> None: + if hasattr(self, "client"): + self.client.close() + + def __del__(self) -> None: + self.__exit__(None, None, None) + + @property + def api_key(self) -> _APIKey | None: + if self.__api_key is not None: + return Auth.api_key(self.__api_key) + return None + + @abstractmethod + def create_collections(self) -> None: + pass + + def insert_batch( + self, + collection: weaviate.collections.Collection, + objects: list[dict[str, Any]], + ) -> None: + with collection.batch.dynamic() as wv_batch: + for obj in objects: + wv_batch.add_object(**obj) + if wv_batch.number_errors > 0: + break + if wv_batch.number_errors > 0: + errors = [err.message for err in collection.batch.results.objs.errors.values()] + raise ValueError(f"Error ingesting batch: {errors}") + + def get_uuids(self, collection: weaviate.collections.Collection) -> list[str]: + return [str(obj.uuid) for obj in collection.iterator(return_properties=[])] + + def _safe_create_collection(self, *args: Any, **kwargs: Any) -> None: + try: + self.client.collections.create(*args, **kwargs) + except weaviate.exceptions.UnexpectedStatusCodeError as err: + if ( + re.search(r"class name (\w+?) already exists", err.message) + and err.status_code == 422 + ): + pass + else: + raise + + +class WeaviateJudgementsDatabase(WeaviateDatabase): + JUDGMENTS_COLLECTION: ClassVar[str] = "judgements" + JUDGMENT_CHUNKS_COLLECTION: ClassVar[str] = "judgement_chunks" + + @property + def judgements_collection(self) -> weaviate.collections.Collection: + return self.client.collections.get(self.JUDGMENTS_COLLECTION) + + @property + def judgement_chunks_collection(self) -> weaviate.collections.Collection: + return self.client.collections.get(self.JUDGMENT_CHUNKS_COLLECTION) + + def create_collections(self) -> None: + self._safe_create_collection( + name=self.JUDGMENTS_COLLECTION, + properties=[ + wvcc.Property(name="judgement_id", data_type=wvcc.DataType.TEXT), + ], + ) + self._safe_create_collection( + name=self.JUDGMENT_CHUNKS_COLLECTION, + properties=[ + wvcc.Property(name="chunk_id", data_type=wvcc.DataType.INT), + wvcc.Property(name="chunk_text", data_type=wvcc.DataType.TEXT), + ], + vectorizer_config=wvcc.Configure.Vectorizer.text2vec_transformers(), + references=[ + wvcc.ReferenceProperty( + name="judgementChunk", + target_collection=self.JUDGMENTS_COLLECTION, + ) + ], + ) + + @staticmethod + def uuid_from_judgement_chunk_id(judgement_id: str, chunk_id: int) -> str: + return weaviate.util.generate_uuid5(f"{judgement_id}_chunk_{chunk_id}") diff --git a/juddges/preprocessing/text_chunker.py b/juddges/preprocessing/text_chunker.py index 25ec80f..c7fb74e 100644 --- a/juddges/preprocessing/text_chunker.py +++ b/juddges/preprocessing/text_chunker.py @@ -8,6 +8,7 @@ class TextSplitter: def __init__( self, chunk_size: int, + chunk_overlap: int | None = None, min_split_chars: int | None = None, take_n_first_chunks: int | None = None, tokenizer: PreTrainedTokenizer | None = None, @@ -16,6 +17,7 @@ def __init__( self.splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer( tokenizer, chunk_size=chunk_size, + chunk_overlap=chunk_overlap, ) else: self.splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size) diff --git a/requirements.txt b/requirements.txt index c1ba949..48fc5e4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -33,6 +33,7 @@ transformers==4.42.3 trl==0.9.4 typer==0.9.0 wandb==0.16.5 +weaviate-client==4.8.1 xmltodict==0.13.0 xlsxwriter==3.2.0 diff --git a/scripts/embed/embed_text.py b/scripts/embed/embed_text.py index 54bba5c..6c24f11 100644 --- a/scripts/embed/embed_text.py +++ b/scripts/embed/embed_text.py @@ -10,6 +10,7 @@ from omegaconf import DictConfig from openai import BaseModel from sentence_transformers import SentenceTransformer +from transformers import PreTrainedTokenizer from transformers.utils import is_flash_attn_2_available from juddges.config import EmbeddingModelConfig, RawDatasetConfig @@ -21,6 +22,7 @@ NUM_PROC = int(os.getenv("NUM_PROC", 1)) DEVICE = "cuda" if torch.cuda.is_available() else "cpu" +os.environ["TOKENIZERS_PARALLELISM"] = "false" if (NUM_PROC > 1) else "true" class EmbeddingConfig(BaseModel, extra="forbid"): @@ -51,12 +53,6 @@ def main(cfg: DictConfig) -> None: ) ds = ds.filter(lambda item: item["text"] is not None) - if config.chunk_config is not None: - ds = chunk_dataset(ds, config) - text_column = "text_chunk" - else: - text_column = "text" - model = SentenceTransformer( config.embedding_model.name, device=DEVICE, @@ -64,6 +60,12 @@ def main(cfg: DictConfig) -> None: ) model.compile() + if config.chunk_config is not None: + ds = chunk_dataset(dataset=ds, config=config, tokenizer=model.tokenizer) + text_column = "text_chunk" + else: + text_column = "text" + if config.truncation_tokens is not None: assert config.truncation_tokens <= config.embedding_model.max_seq_length model.max_seq_length = config.truncation_tokens @@ -74,19 +76,22 @@ def main(cfg: DictConfig) -> None: batched=True, batch_size=config.batch_size, num_proc=None, - remove_columns=[text_column], desc="Embedding chunks", ) - ds.save_to_disk(config.output_dir) + ds.save_to_disk(str(config.output_dir)) with open(config.output_dir / "config.yaml", "w") as f: yaml.dump(config.model_dump(), f) -def chunk_dataset(dataset: Dataset, config: EmbeddingConfig) -> Dataset: +def chunk_dataset( + dataset: Dataset, + config: EmbeddingConfig, + tokenizer: PreTrainedTokenizer | None = None, +) -> Dataset: # todo: To be verified assert config.chunk_config is not None - split_worker = TextSplitter(**config.chunk_config) + split_worker = TextSplitter(**config.chunk_config, tokenizer=tokenizer) ds = dataset.select_columns(["_id", "text"]).map( split_worker, batched=True, diff --git a/scripts/embed/ingest.py b/scripts/embed/ingest_mongodb.py similarity index 100% rename from scripts/embed/ingest.py rename to scripts/embed/ingest_mongodb.py diff --git a/scripts/embed/ingest_weaviate.py b/scripts/embed/ingest_weaviate.py new file mode 100644 index 0000000..99d1a0d --- /dev/null +++ b/scripts/embed/ingest_weaviate.py @@ -0,0 +1,79 @@ +import math +import os +from pathlib import Path + +import typer +from datasets import load_dataset +from dotenv import load_dotenv +from loguru import logger +from tqdm.auto import tqdm + +from juddges.data.weaviate_db import WeaviateJudgementsDatabase +from weaviate.util import generate_uuid5 + +load_dotenv() +WV_HOST = os.getenv("WV_HOST", "localhost") +WV_PORT = os.getenv("WV_PORT", "8080") +WV_GRPC_PORT = os.getenv("WV_GRPC_PORT", "50051") +WV_API_KEY = os.getenv("WV_API_KEY", None) + +BATCH_SIZE = 64 +NUM_PROC = int(os.getenv("NUM_PROC", 1)) + +logger.info(f"Connecting to Weaviate at {WV_HOST}:{WV_PORT} (gRPC: {WV_GRPC_PORT})") + + +def main( + embeddings_dir: Path = typer.Option(...), + batch_size: int = typer.Option(BATCH_SIZE), + upsert: bool = typer.Option(False), +) -> None: + logger.warning( + "The script will upload local embeddings to the database, " + "make sure they are the same as in the inference module of the database." + ) + embs = load_dataset(str(embeddings_dir))["train"] + embs = embs.map( + lambda item: { + "uuid": WeaviateJudgementsDatabase.uuid_from_judgement_chunk_id( + judgement_id=item["_id"], chunk_id=item["chunk_id"] + ) + }, + num_proc=NUM_PROC, + desc="Generating UUIDs", + ) + with WeaviateJudgementsDatabase(WV_HOST, WV_PORT, WV_GRPC_PORT, WV_API_KEY) as db: + if not upsert: + logger.info("upsert disabled - uploading only new embeddings") + uuids = set(db.get_uuids(db.judgement_chunks_collection)) + embs = embs.filter(lambda item: item["uuid"] not in uuids) + else: + logger.info( + "upsert enabled - uploading all embeddings (automatically updating already uploaded)" + ) + + for batch in tqdm( + embs.iter(batch_size=batch_size), + total=math.ceil(len(embs) / batch_size), + desc="Uploading batches", + ): + objects = [ + { + "properties": { + "judgment_id": batch["_id"][i], + "chunk_id": batch["chunk_id"][i], + "chunk_text": batch["text_chunk"][i], + }, + "uuid": generate_uuid5(f"{batch['_id'][i]}_chunk_{batch['chunk_id'][i]}"), + "vector": batch["embedding"][i], + } + for i in range(len(batch["_id"])) + ] + db.insert_batch( + collection=db.judgement_chunks_collection, + objects=objects, + ) + + +if __name__ == "__main__": + typer.run(main) diff --git a/scripts/embed/weaviate_example.py b/scripts/embed/weaviate_example.py new file mode 100644 index 0000000..03dcb56 --- /dev/null +++ b/scripts/embed/weaviate_example.py @@ -0,0 +1,38 @@ +import os +from pprint import pprint + +from dotenv import load_dotenv + +import weaviate +from weaviate.collections.classes.grpc import MetadataQuery + +load_dotenv() +WV_HOST = os.getenv("WV_URL", "localhost") +WV_PORT = int(os.getenv("WV_PORT", 8080)) +WV_GRPC_PORT = int(os.getenv("WV_GRPC_PORT", 50051)) +WV_API_KEY = os.getenv("WV_API_KEY", None) + +QUERY_PROMPT = "zapytanie: {query}" + +# NOTE: This is standalone example, for convenience you can use judgements/data/weaviate_db.py +with weaviate.connect_to_local( + host=WV_HOST, + port=WV_PORT, + grpc_port=WV_GRPC_PORT, + auth_credentials=weaviate.auth.Auth.api_key(WV_API_KEY), +) as client: + coll = client.collections.get("judgement_chunks") + response = coll.query.hybrid( + query=QUERY_PROMPT.format(query="oskarżony handlował narkotykami"), + limit=2, + return_metadata=MetadataQuery(distance=True), + ) + +for o in response.objects: + print( + f"{o.properties['judgment_id']} - {o.properties['chunk_id']}".center( + 100, + "=", + ) + ) + pprint(o.properties["chunk_text"]) diff --git a/weaviate/README.md b/weaviate/README.md new file mode 100644 index 0000000..add67b5 --- /dev/null +++ b/weaviate/README.md @@ -0,0 +1,16 @@ +# Weaviate deployment + +## Instruction +1. Prepare `.env` file with proper user names and API tokens + ```bash + cp example.env .env + ``` +2. Run containers through docker-compose + ```bash + docker compose up -d + ``` + +## Remarks +* Persistent data will be stored inside mounted `./weaviate_data` path +* Deployment was tested on machine with 16 CPU, 64GB memory, and without GPU (vectors were computed outside weaviate instance, `t2v-transformers` used only for inference) +* see [scripts/embed/weaviate_example.py](../scripts/embed/weaviate_example.py) to see search example usage diff --git a/weaviate/docker-compose.yaml b/weaviate/docker-compose.yaml new file mode 100644 index 0000000..e7c0bb8 --- /dev/null +++ b/weaviate/docker-compose.yaml @@ -0,0 +1,33 @@ +name: weaviate +services: + weaviate: + command: + - --host + - 0.0.0.0 + - --port + - '8080' + - --scheme + - http + image: cr.weaviate.io/semitechnologies/weaviate:1.26.4 + depends_on: + - t2v-transformers + ports: + - 8080:8080 + - 50051:50051 + volumes: + - ./weaviate_data:/var/lib/weaviate + restart: on-failure:0 + env_file: + - path: .env + required: true + cpu_count: 14 + mem_limit: 60g + + t2v-transformers: + build: + context: . + dockerfile: hf_transformers.dockerfile + args: + - MODEL_NAME=sdadas/mmlw-roberta-large + environment: + ENABLE_CUDA: 0 # Set to 1 to enable diff --git a/weaviate/example.env b/weaviate/example.env new file mode 100644 index 0000000..5b4fa47 --- /dev/null +++ b/weaviate/example.env @@ -0,0 +1,15 @@ +QUERY_DEFAULTS_LIMIT=25 +PERSISTENCE_DATA_PATH='/var/lib/weaviate' +DEFAULT_VECTORIZER_MODULE='none' +ENABLE_MODULES='' +CLUSTER_HOSTNAME='juddges-vm' + +# vectorization +ENABLE_MODULES='text2vec-transformers' +TRANSFORMERS_INFERENCE_API=http://t2v-transformers:8080 + +# authentication +AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='false' +AUTHENTICATION_APIKEY_ENABLED='true' +AUTHENTICATION_APIKEY_ALLOWED_KEYS=',' +AUTHENTICATION_APIKEY_USERS=',' diff --git a/weaviate/hf_transformers.dockerfile b/weaviate/hf_transformers.dockerfile new file mode 100644 index 0000000..5bb8916 --- /dev/null +++ b/weaviate/hf_transformers.dockerfile @@ -0,0 +1,3 @@ +FROM semitechnologies/transformers-inference:custom +ARG MODEL_NAME +RUN ./download.py