Skip to content

Commit

Permalink
Dashboard updates (#11)
Browse files Browse the repository at this point in the history
* fix non starting postgres after restarts crashes

* make dashboard nicer for judgements

* show only subset

* nbs checkpoints
  • Loading branch information
laugustyniak authored May 15, 2024
1 parent 566593a commit d8b178f
Show file tree
Hide file tree
Showing 7 changed files with 1,008 additions and 923 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
st.header("Schema extraction/definition")
schema_query = st.text_input(
"Ask for schema in natural language:",
"Extract the date, verdict, and court from the judgement.",
"Potrzebuję informacji takich jak data wydania wyroku, sygnatura wyroku, czy wyrok dotyczy spraw dzieci, czy wyrok dotyczy narkomanii, krótkie podsumowanie wyroku, a takze sedziowie orzekajacy.",
)
llm_schema = st.selectbox(
"Select the LLM model (schema)",
Expand Down
36 changes: 30 additions & 6 deletions dashboards/pages/02_🔍_Analyse_Extracted_Information.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import io

import pandas as pd
import streamlit as st

Expand All @@ -17,16 +19,36 @@ def load_data():


df = load_data()
extracted_keys = [line.split(":")[0] for line in EXAMPLE_SCHEMA.split("\n") if len(line) > 3] + [
"signature",
"excerpt",
"text",
"judges",
"references",
]

st.info(
"We sampled 100 random judgements from the dataset and extracted information from them. Below is the extracted information and the schema (questions) used to extract it."
)

st.header("Schema:")
st.write(EXAMPLE_SCHEMA)
st.text_area(
"Example schema for extracted informations: ", value=EXAMPLE_SCHEMA, height=300, disabled=True
)

st.header("Extracted Information - tabular format")
st.write(df)
st.write(df[extracted_keys])


output = io.BytesIO()
with pd.ExcelWriter(output, engine="xlsxwriter") as writer:
df.to_excel(writer, sheet_name="Sheet1", index=False)
output.seek(0)
st.download_button(
label="Download data as Excel",
data=output,
file_name="judgements.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
)

st.header("Analyse Extracted Information")

Expand Down Expand Up @@ -56,7 +78,9 @@ def load_data():

st.write("We can check the sentences of them")

for _, row in drug_offences_df.iterrows():
for row_id, row in drug_offences_df.iterrows():
st.subheader(row["signature"])
st.markdown(row["text"])
st.markdown("---") # Add a horizontal line
st.info(row["verdict_summary"])
if st.toggle(key=row, label="Show judgement's text"):
st.markdown(row["text"])
st.markdown("---")
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
version: "3"
name: juddges

services:
Expand Down Expand Up @@ -51,6 +50,7 @@ services:
- POSTGRES_DB=${POSTGRES_DB}
volumes:
- postgres-juddges-db:/var/lib/postgresql/data
restart: always
networks:
- juddges-network

Expand Down
6 changes: 2 additions & 4 deletions juddges/prompts/information_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@
Format response as JSON:
"""

EXAMPLE_SCHEMA = """
verdict_date: date as ISO 8601
EXAMPLE_SCHEMA = """verdict_date: date as ISO 8601
verdict: string, text representing verdict of the judgement
verdict_summary: string, short summary of the verdict
verdict_id: string
Expand All @@ -52,8 +51,7 @@
drug_offence: boolean
child_offence: boolean
offence_seriousness: boolean
verdict_tags: List[string]
"""
verdict_tags: List[string]"""


def prepare_information_extraction_chain_from_user_prompt() -> RunnableSequence:
Expand Down
1,822 changes: 911 additions & 911 deletions nbs/Data/00_prepare_instruction_dataset_for_ir.ipynb

Large diffs are not rendered by default.

61 changes: 61 additions & 0 deletions nbs/Presentations/00_workshop_demo.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Workshop Demo\n",
"\n",
"> Jak możemy strukturyzować orzeczenia?\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Jakie informacje chcemy/możemy ekstrahować automatycznie z orzeczeń?\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Ogólne\n",
"\n",
"- Sygnatura sprawy\n",
"- Podstawa prawna\n",
"- Strony\n",
"- Sentencja\n",
"- Podsumowanie\n",
"- Tagi, etykiety\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Przykłady specyficznych pytań/zagadnień\n",
"\n",
"- Czy sprawa dotyczy dzieci?\n",
"- Czy sprawa dotyczy wolności słowa?\n",
"- Czy sprawa dotyczy XXX? - każde tego typu pytanie możemy użyć\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Czego brakuje nad w codziennych zadaniach/pracach?\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "python3",
"language": "python",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ langchain==0.1.13
langsmith==0.1.33
loguru==0.7.2
mpire==2.10.0
openpyxl==3.1.2
pandas==2.2.1
polars==0.20.15
pyarrow==15.0.0
Expand All @@ -17,6 +18,7 @@ torch==2.2.1
transformers==4.38.2
typer==0.9.0
xmltodict==0.13.0
xlsxwriter==3.2.0

# dev
coverage==7.4.3
Expand Down

0 comments on commit d8b178f

Please sign in to comment.