-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
32 changed files
with
711 additions
and
691 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,40 +1,40 @@ | ||
from typing import Any | ||
import streamlit as st | ||
|
||
from juddges.data.datasets import get_mongo_collection | ||
from pymongo.collection import Collection | ||
|
||
TITLE = "Search for Judgements" | ||
|
||
st.set_page_config(page_title=TITLE, page_icon="⚖️", layout="wide") | ||
|
||
st.title(TITLE) | ||
|
||
|
||
@st.cache_resource | ||
def get_judgements_collection() -> Collection: | ||
return get_mongo_collection("judgements") | ||
|
||
|
||
judgements_collection = get_judgements_collection() | ||
|
||
|
||
def search_data(query: str, max_judgements: int = 5) -> list[dict[str, Any]]: | ||
items = list(judgements_collection.find({"$text": {"$search": query}}).limit(max_judgements)) | ||
return items | ||
|
||
|
||
with st.form(key="search_form"): | ||
text = st.text_area("What you are looking for in the judgements?") | ||
max_judgements = st.slider("Max judgements to show", min_value=1, max_value=20, value=5) | ||
submit_button = st.form_submit_button(label="Search") | ||
|
||
if submit_button: | ||
with st.spinner("Searching..."): | ||
items = search_data(text, max_judgements) | ||
|
||
st.header("Judgements - Results") | ||
for item in items: | ||
st.header(item["signature"]) | ||
st.subheader(item["publicationDate"]) | ||
st.write(item["text"]) | ||
from typing import Any | ||
import streamlit as st | ||
|
||
from juddges.data.datasets import get_mongo_collection | ||
from pymongo.collection import Collection | ||
|
||
TITLE = "Search for Judgements" | ||
|
||
st.set_page_config(page_title=TITLE, page_icon="⚖️", layout="wide") | ||
|
||
st.title(TITLE) | ||
|
||
|
||
@st.cache_resource | ||
def get_judgements_collection() -> Collection: | ||
return get_mongo_collection("judgements") | ||
|
||
|
||
judgements_collection = get_judgements_collection() | ||
|
||
|
||
def search_data(query: str, max_judgements: int = 5) -> list[dict[str, Any]]: | ||
items = list(judgements_collection.find({"$text": {"$search": query}}).limit(max_judgements)) | ||
return items | ||
|
||
|
||
with st.form(key="search_form"): | ||
text = st.text_area("What you are looking for in the judgements?") | ||
max_judgements = st.slider("Max judgements to show", min_value=1, max_value=20, value=5) | ||
submit_button = st.form_submit_button(label="Search") | ||
|
||
if submit_button: | ||
with st.spinner("Searching..."): | ||
items = search_data(text, max_judgements) | ||
|
||
st.header("Judgements - Results") | ||
for item in items: | ||
st.header(item["signature"]) | ||
st.subheader(item["publicationDate"]) | ||
st.write(item["text"]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,86 +1,86 @@ | ||
import io | ||
|
||
import pandas as pd | ||
import streamlit as st | ||
|
||
from juddges.prompts.information_extraction import EXAMPLE_SCHEMA | ||
from juddges.settings import SAMPLE_DATA_PATH | ||
|
||
TITLE = "Analyse Judgements" | ||
|
||
st.set_page_config(page_title=TITLE, page_icon="⚖️", layout="wide") | ||
|
||
st.title(TITLE) | ||
|
||
|
||
@st.cache_resource | ||
def load_data(): | ||
return pd.read_csv(SAMPLE_DATA_PATH / "judgements-100-sample-with-retrieved-informations.csv") | ||
|
||
|
||
df = load_data() | ||
extracted_keys = [line.split(":")[0] for line in EXAMPLE_SCHEMA.split("\n") if len(line) > 3] + [ | ||
"signature", | ||
"excerpt", | ||
"text", | ||
"judges", | ||
"references", | ||
] | ||
|
||
st.info( | ||
"We sampled 100 random judgements from the dataset and extracted information from them. Below is the extracted information and the schema (questions) used to extract it." | ||
) | ||
|
||
st.text_area( | ||
"Example schema for extracted informations: ", value=EXAMPLE_SCHEMA, height=300, disabled=True | ||
) | ||
|
||
st.header("Extracted Information - tabular format") | ||
st.write(df[extracted_keys]) | ||
|
||
|
||
output = io.BytesIO() | ||
with pd.ExcelWriter(output, engine="xlsxwriter") as writer: | ||
df.to_excel(writer, sheet_name="Sheet1", index=False) | ||
output.seek(0) | ||
st.download_button( | ||
label="Download data as Excel", | ||
data=output, | ||
file_name="judgements.xlsx", | ||
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", | ||
) | ||
|
||
st.header("Analyse Extracted Information") | ||
|
||
st.subheader("How many judgements we analyzed?") | ||
|
||
st.write(f"Number of judgements: {len(df)}") | ||
|
||
st.subheader("What courts judgement do we analyse") | ||
|
||
st.write(df.groupby("court")["_id"].count()) | ||
|
||
st.subheader("How many judgements are drug offences?") | ||
|
||
drug_offences = df["drug_offence"].sum() | ||
|
||
st.info(f"Number of drug offences: {drug_offences}") | ||
|
||
st.subheader("How many judgements are child offences?") | ||
|
||
child_offences = df["child_offence"].sum() | ||
|
||
st.info(f"Number of child offences: {child_offences}") | ||
|
||
st.subheader("Show examples of judgements that are child offences") | ||
|
||
drug_offences_df = df[df["child_offence"]] | ||
|
||
st.write("We can check the sentences of them") | ||
|
||
for row_id, row in drug_offences_df.iterrows(): | ||
st.subheader(row["signature"]) | ||
st.info(row["verdict_summary"]) | ||
if st.toggle(key=row, label="Show judgement's text"): | ||
st.markdown(row["text"]) | ||
st.markdown("---") | ||
import io | ||
|
||
import pandas as pd | ||
import streamlit as st | ||
|
||
from juddges.prompts.information_extraction import EXAMPLE_SCHEMA | ||
from juddges.settings import SAMPLE_DATA_PATH | ||
|
||
TITLE = "Analyse Judgements" | ||
|
||
st.set_page_config(page_title=TITLE, page_icon="⚖️", layout="wide") | ||
|
||
st.title(TITLE) | ||
|
||
|
||
@st.cache_resource | ||
def load_data(): | ||
return pd.read_csv(SAMPLE_DATA_PATH / "judgements-100-sample-with-retrieved-informations.csv") | ||
|
||
|
||
df = load_data() | ||
extracted_keys = [line.split(":")[0] for line in EXAMPLE_SCHEMA.split("\n") if len(line) > 3] + [ | ||
"signature", | ||
"excerpt", | ||
"text", | ||
"judges", | ||
"references", | ||
] | ||
|
||
st.info( | ||
"We sampled 100 random judgements from the dataset and extracted information from them. Below is the extracted information and the schema (questions) used to extract it." | ||
) | ||
|
||
st.text_area( | ||
"Example schema for extracted informations: ", value=EXAMPLE_SCHEMA, height=300, disabled=True | ||
) | ||
|
||
st.header("Extracted Information - tabular format") | ||
st.write(df[extracted_keys]) | ||
|
||
|
||
output = io.BytesIO() | ||
with pd.ExcelWriter(output, engine="xlsxwriter") as writer: | ||
df.to_excel(writer, sheet_name="Sheet1", index=False) | ||
output.seek(0) | ||
st.download_button( | ||
label="Download data as Excel", | ||
data=output, | ||
file_name="judgements.xlsx", | ||
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", | ||
) | ||
|
||
st.header("Analyse Extracted Information") | ||
|
||
st.subheader("How many judgements we analyzed?") | ||
|
||
st.write(f"Number of judgements: {len(df)}") | ||
|
||
st.subheader("What courts judgement do we analyse") | ||
|
||
st.write(df.groupby("court")["_id"].count()) | ||
|
||
st.subheader("How many judgements are drug offences?") | ||
|
||
drug_offences = df["drug_offence"].sum() | ||
|
||
st.info(f"Number of drug offences: {drug_offences}") | ||
|
||
st.subheader("How many judgements are child offences?") | ||
|
||
child_offences = df["child_offence"].sum() | ||
|
||
st.info(f"Number of child offences: {child_offences}") | ||
|
||
st.subheader("Show examples of judgements that are child offences") | ||
|
||
drug_offences_df = df[df["child_offence"]] | ||
|
||
st.write("We can check the sentences of them") | ||
|
||
for row_id, row in drug_offences_df.iterrows(): | ||
st.subheader(row["signature"]) | ||
st.info(row["verdict_summary"]) | ||
if st.toggle(key=row, label="Show judgement's text"): | ||
st.markdown(row["text"]) | ||
st.markdown("---") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,4 +9,4 @@ | |
"recorder": 0.9931748509407043, | ||
"signature": 0.9937450289726257 | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,4 +9,4 @@ | |
"recorder": 0.7640316486358643, | ||
"signature": 0.7549777626991272 | ||
} | ||
} | ||
} |
Oops, something went wrong.