Dashboard updates (#11)

* fix non starting postgres after restarts crashes * make dashboard nicer for judgements * show only subset * nbs checkpoints
pwr-ai · May 15, 2024 · d8b178f · d8b178f
1 parent 566593a
commit d8b178f
Show file tree

Hide file tree

Showing 7 changed files with 1,008 additions and 923 deletions.
diff --git a/dashboards/pages/00_🛠️_Extract_Information_from_Judgements.py b/dashboards/pages/00_🛠️_Extract_Information_from_Judgements.py
@@ -38,7 +38,7 @@
 st.header("Schema extraction/definition")
 schema_query = st.text_input(
     "Ask for schema in natural language:",
-    "Extract the date, verdict, and court from  the judgement.",
+    "Potrzebuję informacji takich jak data wydania wyroku, sygnatura wyroku, czy wyrok dotyczy spraw dzieci, czy wyrok dotyczy narkomanii, krótkie podsumowanie wyroku, a takze sedziowie orzekajacy.",
 )
 llm_schema = st.selectbox(
     "Select the LLM model (schema)",

diff --git a/dashboards/pages/02_🔍_Analyse_Extracted_Information.py b/dashboards/pages/02_🔍_Analyse_Extracted_Information.py
@@ -1,3 +1,5 @@
+import io
+
 import pandas as pd
 import streamlit as st
 
@@ -17,16 +19,36 @@ def load_data():
 
 
 df = load_data()
+extracted_keys = [line.split(":")[0] for line in EXAMPLE_SCHEMA.split("\n") if len(line) > 3] + [
+    "signature",
+    "excerpt",
+    "text",
+    "judges",
+    "references",
+]
 
 st.info(
     "We sampled 100 random judgements from the dataset and extracted information from them. Below is the extracted information and the schema (questions) used to extract it."
 )
 
-st.header("Schema:")
-st.write(EXAMPLE_SCHEMA)
+st.text_area(
+    "Example schema for extracted informations: ", value=EXAMPLE_SCHEMA, height=300, disabled=True
+)
 
 st.header("Extracted Information - tabular format")
-st.write(df)
+st.write(df[extracted_keys])
+
+
+output = io.BytesIO()
+with pd.ExcelWriter(output, engine="xlsxwriter") as writer:
+    df.to_excel(writer, sheet_name="Sheet1", index=False)
+output.seek(0)
+st.download_button(
+    label="Download data as Excel",
+    data=output,
+    file_name="judgements.xlsx",
+    mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+)
 
 st.header("Analyse Extracted Information")
 
@@ -56,7 +78,9 @@ def load_data():
 
 st.write("We can check the sentences of them")
 
-for _, row in drug_offences_df.iterrows():
+for row_id, row in drug_offences_df.iterrows():
     st.subheader(row["signature"])
-    st.markdown(row["text"])
-    st.markdown("---")  # Add a horizontal line
+    st.info(row["verdict_summary"])
+    if st.toggle(key=row, label="Show judgement's text"):
+        st.markdown(row["text"])
+        st.markdown("---")
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -1,4 +1,3 @@
-version: "3"
 name: juddges
 
 services:
@@ -51,6 +50,7 @@ services:
       - POSTGRES_DB=${POSTGRES_DB}
     volumes:
       - postgres-juddges-db:/var/lib/postgresql/data
+    restart: always
     networks:
       - juddges-network
 

diff --git a/juddges/prompts/information_extraction.py b/juddges/prompts/information_extraction.py
@@ -40,8 +40,7 @@
 Format response as JSON:
 """
 
-EXAMPLE_SCHEMA = """
-verdict_date: date as ISO 8601
+EXAMPLE_SCHEMA = """verdict_date: date as ISO 8601
 verdict: string, text representing verdict of the judgement
 verdict_summary: string, short summary of the verdict
 verdict_id: string
@@ -52,8 +51,7 @@
 drug_offence: boolean
 child_offence: boolean
 offence_seriousness: boolean
-verdict_tags: List[string]
-"""
+verdict_tags: List[string]"""
 
 
 def prepare_information_extraction_chain_from_user_prompt() -> RunnableSequence:

diff --git a/nbs/Data/00_prepare_instruction_dataset_for_ir.ipynb b/nbs/Data/00_prepare_instruction_dataset_for_ir.ipynb
diff --git a/nbs/Presentations/00_workshop_demo.ipynb b/nbs/Presentations/00_workshop_demo.ipynb
@@ -0,0 +1,61 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Workshop Demo\n",
+    "\n",
+    "> Jak możemy strukturyzować orzeczenia?\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Jakie informacje chcemy/możemy ekstrahować automatycznie z orzeczeń?\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Ogólne\n",
+    "\n",
+    "- Sygnatura sprawy\n",
+    "- Podstawa prawna\n",
+    "- Strony\n",
+    "- Sentencja\n",
+    "- Podsumowanie\n",
+    "- Tagi, etykiety\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Przykłady specyficznych pytań/zagadnień\n",
+    "\n",
+    "- Czy sprawa dotyczy dzieci?\n",
+    "- Czy sprawa dotyczy wolności słowa?\n",
+    "- Czy sprawa dotyczy XXX? - każde tego typu pytanie możemy użyć\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Czego brakuje nad w codziennych zadaniach/pracach?\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python3",
+   "language": "python",
+   "name": "python3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/requirements.txt b/requirements.txt
@@ -4,6 +4,7 @@ langchain==0.1.13
 langsmith==0.1.33
 loguru==0.7.2
 mpire==2.10.0
+openpyxl==3.1.2
 pandas==2.2.1
 polars==0.20.15
 pyarrow==15.0.0
@@ -17,6 +18,7 @@ torch==2.2.1
 transformers==4.38.2
 typer==0.9.0
 xmltodict==0.13.0
+xlsxwriter==3.2.0
 
 # dev
 coverage==7.4.3