Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add llm-as-judge #27

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/nbdev-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on: [workflow_dispatch, pull_request]
jobs:
nbdev-test:
runs-on: ubuntu-latest
steps:
steps:
- uses: fastai/workflows/nbdev-ci@master
with:
skip_test: true
38 changes: 27 additions & 11 deletions .github/workflows/python.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,26 @@ jobs:
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- uses: actions/cache@v3
cache: 'pip'
cache-dependency-path: |
requirements.txt

- name: Cache pre-commit
uses: actions/cache@v4
with:
path: ${{ env.pythonLocation }}
key: ${{ runner.os }}-${{ env.pythonLocation }}-${{ hashFiles('requirements.txt') }}
path: ~/.cache/pre-commit
key: pre-commit|${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('.pre-commit-config.yaml') }}

- name: Install deps
run: make install_cpu
run: |
python -m pip install --upgrade pip
make install_cpu

- name: Lint
run: make check

Expand All @@ -40,15 +51,20 @@ jobs:
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- uses: actions/cache@v3
with:
path: ${{ env.pythonLocation }}
key: ${{ runner.os }}-${{ env.pythonLocation }}-${{ hashFiles('requirements.txt') }}
cache: 'pip'
cache-dependency-path: |
requirements.txt

- name: Install deps
run: make install_cpu
run: |
python -m pip install --upgrade pip
make install_cpu

- name: Test
env:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
Expand Down
29 changes: 29 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: no-commit-to-branch
name: No commits to master
- id: end-of-file-fixer
name: End-of-file fixer
- name: mixed-line-ending
id: mixed-line-ending
args: [--fix, lf]
- id: trailing-whitespace
name: Remove trailing whitespaces
- id: check-toml
name: Check toml
- id: check-yaml
name: Check yaml
args: [--allow-multiple-documents]


- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.10
hooks:
- id: ruff
name: Ruff Linter
args: [--fix, --exit-non-zero-on-fix, juddges, scripts, dashboards, tests]
- id: ruff-format
name: Ruff Formatter
args: [juddges, scripts, dashboards, tests]
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ RUN apt-get update \
&& gdebi -n quarto-1.5.17-linux-amd64.deb \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists \
&& rm -rf /tmp
&& rm -rf /tmp

ENV PYTHONDONTWRITEBYTECODE 1
ENV PYTHONUNBUFFERED 1
Expand Down
9 changes: 3 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,10 @@ lint_dirs := juddges scripts dashboards tests
mypy_dirs := juddges scripts dashboards tests

fix:
ruff check $(lint_dirs) --fix
ruff format $(lint_dirs)
pre-commit run --all-files

check:
ruff check $(lint_dirs)
ruff format $(lint_dirs) --check
pre-commit run --all-files

check-types:
mypy --install-types --non-interactive $(mypy_dirs)
Expand All @@ -25,8 +23,7 @@ install:
install_cpu:
pip install --find-links https://download.pytorch.org/whl/cpu -r requirements.txt

# unsloth requires python 3.10
# requires conda environment
# unsloth requires python 3.10 and conda environment
install_unsloth:
conda install --yes pytorch-cuda=12.1 pytorch cudatoolkit xformers -c pytorch -c nvidia -c xformers
pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
Expand Down
10 changes: 5 additions & 5 deletions configs/embedding.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ defaults:
- embedding_model: ???
- dataset: pl-court-raw
- _self_
- override hydra/hydra_logging: disabled
- override hydra/job_logging: disabled
- override hydra/hydra_logging: disabled
- override hydra/job_logging: disabled

length_adjust_mode: chunk
chunk_config:
Expand All @@ -14,7 +14,7 @@ batch_size: 64

output_dir: data/embeddings/${dataset.name}/${hydra:runtime.choices.embedding_model}/all_embeddings

hydra:
output_subdir: null
run:
hydra:
output_subdir: null
run:
dir: .
10 changes: 5 additions & 5 deletions configs/fine_tuning.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ defaults:
- model: ???
- dataset: pl-court-instruct
- _self_
- override hydra/hydra_logging: disabled
- override hydra/job_logging: disabled
- override hydra/hydra_logging: disabled
- override hydra/job_logging: disabled

output_dir: data/experiments/fine-tune/${hydra:runtime.choices.model}/${hydra:runtime.choices.dataset}
run_name: ${hydra:runtime.choices.model}_${hydra:runtime.choices.dataset}_fine_tune
Expand All @@ -17,7 +17,7 @@ truncate_context: True
epochs: 1
batch_size: 4

hydra:
output_subdir: null
run:
hydra:
output_subdir: null
run:
dir: .
11 changes: 11 additions & 0 deletions configs/llm_judge.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
defaults:
- model: ???
- _self_

answers_file: ???
out_metric_file: ???
out_predictions_file: ???

generate_kwargs:
max_new_tokens: 20
do_sample: False
14 changes: 8 additions & 6 deletions configs/predict.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,19 @@ defaults:
- model: ???
- dataset: pl-court-instruct
- _self_
- override hydra/hydra_logging: disabled
- override hydra/job_logging: disabled
- override hydra/hydra_logging: disabled
- override hydra/job_logging: disabled

device_map: 'auto'
output_file: data/experiments/predict/${hydra:runtime.choices.dataset}/outputs_${hydra:runtime.choices.model}.json
metrics_file: data/experiments/predict/${hydra:runtime.choices.dataset}/metrics_${hydra:runtime.choices.model}.json

max_new_tokens: 250
truncate_context: True
generate_kwargs:
max_new_tokens: 250
do_sample: False

hydra:
output_subdir: null
run:
hydra:
output_subdir: null
run:
dir: .
81 changes: 41 additions & 40 deletions dashboards/pages/01_🔍_Search_Judgements.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,41 @@
from typing import Any
import streamlit as st

from juddges.data.datasets import get_mongo_collection
from pymongo.collection import Collection

TITLE = "Search for Judgements"

st.set_page_config(page_title=TITLE, page_icon="⚖️", layout="wide")

st.title(TITLE)


@st.cache_resource
def get_judgements_collection() -> Collection:
return get_mongo_collection("judgements")


judgements_collection = get_judgements_collection()


def search_data(query: str, max_judgements: int = 5) -> list[dict[str, Any]]:
items = list(judgements_collection.find({"$text": {"$search": query}}).limit(max_judgements))
return items


with st.form(key="search_form"):
text = st.text_area("What you are looking for in the judgements?")
max_judgements = st.slider("Max judgements to show", min_value=1, max_value=20, value=5)
submit_button = st.form_submit_button(label="Search")

if submit_button:
with st.spinner("Searching..."):
items = search_data(text, max_judgements)

st.header("Judgements - Results")
for item in items:
st.header(item["signature"])
st.subheader(item["publicationDate"])
st.write(item["text"])
from typing import Any

import streamlit as st
from pymongo.collection import Collection

from juddges.data.datasets import get_mongo_collection

TITLE = "Search for Judgements"

st.set_page_config(page_title=TITLE, page_icon="⚖️", layout="wide")

st.title(TITLE)


@st.cache_resource
def get_judgements_collection() -> Collection:
return get_mongo_collection("judgements")


judgements_collection = get_judgements_collection()


def search_data(query: str, max_judgements: int = 5) -> list[dict[str, Any]]:
items = list(judgements_collection.find({"$text": {"$search": query}}).limit(max_judgements))
return items


with st.form(key="search_form"):
text = st.text_area("What you are looking for in the judgements?")
max_judgements = st.slider("Max judgements to show", min_value=1, max_value=20, value=5)
submit_button = st.form_submit_button(label="Search")

if submit_button:
with st.spinner("Searching..."):
items = search_data(text, max_judgements)

st.header("Judgements - Results")
for item in items:
st.header(item["signature"])
st.subheader(item["publicationDate"])
st.write(item["text"])
Loading
Loading