From 8cabe13de84ee394939b6053ad45b6cffbf6be07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Augustyniak?= Date: Tue, 9 Apr 2024 10:24:34 +0000 Subject: [PATCH] text analysis --- .../01_Analyze_Polish_Judgements_Texts.ipynb | 215 ++++++++++++++++++ 1 file changed, 215 insertions(+) create mode 100644 nbs/Data/01_Analyze_Polish_Judgements_Texts.ipynb diff --git a/nbs/Data/01_Analyze_Polish_Judgements_Texts.ipynb b/nbs/Data/01_Analyze_Polish_Judgements_Texts.ipynb new file mode 100644 index 0000000..fa8e891 --- /dev/null +++ b/nbs/Data/01_Analyze_Polish_Judgements_Texts.ipynb @@ -0,0 +1,215 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9e365555", + "metadata": {}, + "source": [ + "# Analyze Text of Polish Judgements\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b666da3-f393-4d88-8036-e818937d2305", + "metadata": {}, + "outputs": [], + "source": [ + "# | eval: false\n", + "from datasets import load_from_disk\n", + "import string\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from juddges.settings import PL_JUDGEMENTS_PATH_TEXTS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1f37c21-de73-48ee-8cc3-8f4f2d4ce735", + "metadata": {}, + "outputs": [], + "source": [ + "# | eval: false\n", + "ds = load_from_disk(dataset_path=PL_JUDGEMENTS_PATH_TEXTS)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c49a038b-3bd5-4124-89c2-a019c364fd22", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "33bd783c6a53402c91a8ee3cd8fb122f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Map (num_proc=40): 0%| | 0/408423 [00:00