From 93278373159cfdbe801c5b6b4da41f6a2d217d70 Mon Sep 17 00:00:00 2001 From: urigott Date: Thu, 5 Oct 2023 10:57:32 +0300 Subject: [PATCH] remove old files --- docs/Makefile | 19 - docs/changelog.md | 2 - docs/conduct.md | 2 - docs/conf.py | 37 -- docs/contributing.md | 2 - docs/example.ipynb | 428 ------------------ docs/index.md | 13 - docs/make.bat | 36 -- docs/requirements.txt | 3 - source/conf.py | 26 -- source/index.rst | 20 - src/end_to_end_test.py | 32 -- src/rtichoke/__init__.py | 27 -- src/rtichoke/calibration/__init__.py | 3 - src/rtichoke/calibration/calibration.py | 286 ------------ src/rtichoke/discrimination/__init__.py | 3 - src/rtichoke/discrimination/gains.py | 71 --- src/rtichoke/discrimination/lift.py | 71 --- .../discrimination/precision_recall.py | 71 --- src/rtichoke/discrimination/roc.py | 72 --- src/rtichoke/helpers/__init__.py | 3 - src/rtichoke/helpers/exported_functions.py | 237 ---------- src/rtichoke/helpers/helper_functions.py | 105 ----- .../helpers/plotly_helper_functions.py | 114 ----- .../send_post_request_to_r_rtichoke.py | 195 -------- src/rtichoke/helpers/validations.py | 133 ------ src/rtichoke/performance_data/__init__.py | 3 - .../performance_data/performance_data.py | 41 -- .../prepare_calibration_data.py | 109 ----- .../prepare_performance_data.py | 168 ------- src/rtichoke/plot/__init__.py | 3 - src/rtichoke/plot/bokeh/__init__.py | 3 - .../plot/bokeh/create_bokeh_plot_dict.py | 111 ----- src/rtichoke/plot/bokeh/plot_bokeh.py | 251 ---------- src/rtichoke/plot/create_generic_plot_dict.py | 79 ---- src/rtichoke/plot/plotting.py | 27 -- src/rtichoke/rtichoke.py | 0 src/rtichoke/summary_report/__init__.py | 3 - src/rtichoke/summary_report/summary_report.py | 21 - src/rtichoke/utility/__init__.py | 3 - src/rtichoke/utility/decision.py | 98 ---- src/tests/__init__.py | 4 - src/tests/test_calculate_performance_data.py | 95 ---- src/tests/test_vericiation_tests.py | 155 ------- tests/test_rtichoke.py | 10 - 45 files changed, 3195 deletions(-) delete mode 100644 docs/Makefile delete mode 100644 docs/changelog.md delete mode 100644 docs/conduct.md delete mode 100644 docs/conf.py delete mode 100644 docs/contributing.md delete mode 100644 docs/example.ipynb delete mode 100644 docs/index.md delete mode 100644 docs/make.bat delete mode 100644 docs/requirements.txt delete mode 100644 source/conf.py delete mode 100644 source/index.rst delete mode 100644 src/end_to_end_test.py delete mode 100644 src/rtichoke/__init__.py delete mode 100644 src/rtichoke/calibration/__init__.py delete mode 100644 src/rtichoke/calibration/calibration.py delete mode 100644 src/rtichoke/discrimination/__init__.py delete mode 100644 src/rtichoke/discrimination/gains.py delete mode 100644 src/rtichoke/discrimination/lift.py delete mode 100644 src/rtichoke/discrimination/precision_recall.py delete mode 100644 src/rtichoke/discrimination/roc.py delete mode 100644 src/rtichoke/helpers/__init__.py delete mode 100644 src/rtichoke/helpers/exported_functions.py delete mode 100644 src/rtichoke/helpers/helper_functions.py delete mode 100644 src/rtichoke/helpers/plotly_helper_functions.py delete mode 100644 src/rtichoke/helpers/send_post_request_to_r_rtichoke.py delete mode 100644 src/rtichoke/helpers/validations.py delete mode 100644 src/rtichoke/performance_data/__init__.py delete mode 100644 src/rtichoke/performance_data/performance_data.py delete mode 100644 src/rtichoke/performance_data/prepare_calibration_data.py delete mode 100644 src/rtichoke/performance_data/prepare_performance_data.py delete mode 100644 src/rtichoke/plot/__init__.py delete mode 100644 src/rtichoke/plot/bokeh/__init__.py delete mode 100644 src/rtichoke/plot/bokeh/create_bokeh_plot_dict.py delete mode 100644 src/rtichoke/plot/bokeh/plot_bokeh.py delete mode 100644 src/rtichoke/plot/create_generic_plot_dict.py delete mode 100644 src/rtichoke/plot/plotting.py delete mode 100644 src/rtichoke/rtichoke.py delete mode 100644 src/rtichoke/summary_report/__init__.py delete mode 100644 src/rtichoke/summary_report/summary_report.py delete mode 100644 src/rtichoke/utility/__init__.py delete mode 100644 src/rtichoke/utility/decision.py delete mode 100644 src/tests/__init__.py delete mode 100644 src/tests/test_calculate_performance_data.py delete mode 100644 src/tests/test_vericiation_tests.py delete mode 100644 tests/test_rtichoke.py diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index 5172566..0000000 --- a/docs/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -# Minimal makefile for Sphinx documentation - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = python -msphinx -SPHINXPROJ = rtichoke -SOURCEDIR = . -BUILDDIR = _build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/docs/changelog.md b/docs/changelog.md deleted file mode 100644 index 8261b35..0000000 --- a/docs/changelog.md +++ /dev/null @@ -1,2 +0,0 @@ -```{include} ../CHANGELOG.md -``` \ No newline at end of file diff --git a/docs/conduct.md b/docs/conduct.md deleted file mode 100644 index 0568705..0000000 --- a/docs/conduct.md +++ /dev/null @@ -1,2 +0,0 @@ -```{include} ../CONDUCT.md -``` \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py deleted file mode 100644 index 6d31e8d..0000000 --- a/docs/conf.py +++ /dev/null @@ -1,37 +0,0 @@ -"""docs file""" -# Configuration file for the Sphinx documentation builder. -# -# This file only contains a selection of the most common options. For a full -# list see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Project information ----------------------------------------------------- - -project = "rtichoke" -copyrights = "2023, Uriah Finkel" -author = "Uriah Finkel" - -# -- General configuration --------------------------------------------------- - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - "myst_nb", - "autoapi.extension", - "sphinx.ext.napoleon", - "sphinx.ext.viewcode", -] -autoapi_dirs = ["../src"] - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = "sphinx_rtd_theme" diff --git a/docs/contributing.md b/docs/contributing.md deleted file mode 100644 index 435d357..0000000 --- a/docs/contributing.md +++ /dev/null @@ -1,2 +0,0 @@ -```{include} ../CONTRIBUTING.md -``` \ No newline at end of file diff --git a/docs/example.ipynb b/docs/example.ipynb deleted file mode 100644 index a93f58f..0000000 --- a/docs/example.ipynb +++ /dev/null @@ -1,428 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Example usage\n", - "\n", - "To use `rtichoke` in a project:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.1.0\n" - ] - } - ], - "source": [ - "import rtichoke\n", - "\n", - "print(rtichoke.__version__)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import json\n", - "import requests\n", - "from sklearn.datasets import make_classification\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.linear_model import LogisticRegression\n", - "import numpy as np\n", - "\n", - "lr = LogisticRegression()\n", - "x = np.arange(10).reshape(-1, 1)\n", - "y = np.array([0, 1, 0, 0, 1, 1, 1, 0, 0, 1])\n", - "\n", - "x_test = np.arange(7).reshape(-1, 1)\n", - "y_test = np.array([1, 0, 1, 0, 1, 0, 0])\n", - "\n", - "model = LogisticRegression(solver=\"liblinear\", random_state=0)\n", - "lasso = LogisticRegression(solver=\"liblinear\", penalty=\"l1\", random_state=0)\n", - "\n", - "model.fit(x, y)\n", - "lasso.fit(x_test, y_test)\n", - "\n", - "probs_dict_for_examples = {\n", - " \"One Model\": {\"Logistic Regression\": model.predict_proba(x)[:, 1].tolist()},\n", - " \"Multiple Models\": {\n", - " \"Logistic Regression\": model.predict_proba(x)[:, 1].tolist(),\n", - " \"Lasso\": lasso.predict_proba(x)[:, 1].tolist(),\n", - " },\n", - " \"Multiple Populations\": {\n", - " \"Train\": model.predict_proba(x)[:, 1].tolist(),\n", - " \"Test\": model.predict_proba(x_test)[:, 1].tolist(),\n", - " },\n", - "}\n", - "\n", - "reals_dict_for_examples = {\n", - " \"One Model\": {\"Logistic Regression\": y.tolist()},\n", - " \"Multiple Models\": {\"Reals\": y.tolist()},\n", - " \"Multiple Populations\": {\"Train\": y.tolist(), \"Test\": y_test.tolist()},\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "dict_keys(['probability_threshold', 'TP', 'TN', 'FN', 'FP', 'sensitivity', 'FPR', 'specificity', 'PPV', 'NPV', 'lift', 'predicted_positives', 'NB', 'ppcr'])\n", - "dict_keys(['probability_threshold', 'ppcr', 'TP', 'TN', 'FN', 'FP', 'sensitivity', 'FPR', 'specificity', 'PPV', 'NPV', 'lift', 'predicted_positives'])\n", - "dict_keys(['model', 'probability_threshold', 'TP', 'TN', 'FN', 'FP', 'sensitivity', 'FPR', 'specificity', 'PPV', 'NPV', 'lift', 'predicted_positives', 'NB', 'ppcr'])\n", - "dict_keys(['model', 'probability_threshold', 'ppcr', 'TP', 'TN', 'FN', 'FP', 'sensitivity', 'FPR', 'specificity', 'PPV', 'NPV', 'lift', 'predicted_positives'])\n", - "dict_keys(['population', 'probability_threshold', 'TP', 'TN', 'FN', 'FP', 'sensitivity', 'FPR', 'specificity', 'PPV', 'NPV', 'lift', 'predicted_positives', 'NB', 'ppcr'])\n", - "dict_keys(['population', 'probability_threshold', 'ppcr', 'TP', 'TN', 'FN', 'FP', 'sensitivity', 'FPR', 'specificity', 'PPV', 'NPV', 'lift', 'predicted_positives'])\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
probability_thresholdTPTNFNFPsensitivityFPRspecificityPPVNPVliftpredicted_positivesNBppcr
00.0050051.01.00.00.5NaN1.0100.50001.0
10.0150051.01.00.00.5NaN1.0100.49491.0
20.0250051.01.00.00.5NaN1.0100.48981.0
30.0350051.01.00.00.5NaN1.0100.48451.0
40.0450051.01.00.00.5NaN1.0100.47921.0
.............................................
960.9605500.00.01.0NaN0.5NaN00.00000.0
970.9705500.00.01.0NaN0.5NaN00.00000.0
980.9805500.00.01.0NaN0.5NaN00.00000.0
990.9905500.00.01.0NaN0.5NaN00.00000.0
1001.0005500.00.01.0NaN0.5NaN0NaN0.0
\n", - "

101 rows × 14 columns

\n", - "
" - ], - "text/plain": [ - " probability_threshold TP TN FN FP sensitivity FPR specificity \\\n", - "0 0.00 5 0 0 5 1.0 1.0 0.0 \n", - "1 0.01 5 0 0 5 1.0 1.0 0.0 \n", - "2 0.02 5 0 0 5 1.0 1.0 0.0 \n", - "3 0.03 5 0 0 5 1.0 1.0 0.0 \n", - "4 0.04 5 0 0 5 1.0 1.0 0.0 \n", - ".. ... .. .. .. .. ... ... ... \n", - "96 0.96 0 5 5 0 0.0 0.0 1.0 \n", - "97 0.97 0 5 5 0 0.0 0.0 1.0 \n", - "98 0.98 0 5 5 0 0.0 0.0 1.0 \n", - "99 0.99 0 5 5 0 0.0 0.0 1.0 \n", - "100 1.00 0 5 5 0 0.0 0.0 1.0 \n", - "\n", - " PPV NPV lift predicted_positives NB ppcr \n", - "0 0.5 NaN 1.0 10 0.5000 1.0 \n", - "1 0.5 NaN 1.0 10 0.4949 1.0 \n", - "2 0.5 NaN 1.0 10 0.4898 1.0 \n", - "3 0.5 NaN 1.0 10 0.4845 1.0 \n", - "4 0.5 NaN 1.0 10 0.4792 1.0 \n", - ".. ... ... ... ... ... ... \n", - "96 NaN 0.5 NaN 0 0.0000 0.0 \n", - "97 NaN 0.5 NaN 0 0.0000 0.0 \n", - "98 NaN 0.5 NaN 0 0.0000 0.0 \n", - "99 NaN 0.5 NaN 0 0.0000 0.0 \n", - "100 NaN 0.5 NaN 0 NaN 0.0 \n", - "\n", - "[101 rows x 14 columns]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\n", - "performance_datas = [\n", - " rtichoke.prepare_performance_data(\n", - " probs=probs_dict_for_examples[x],\n", - " reals=reals_dict_for_examples[x],\n", - " stratified_by=stratified_by,\n", - " url_api=\"http://127.0.0.1:7644/\",\n", - " )\n", - " for x in probs_dict_for_examples.keys()\n", - " for stratified_by in [\"probability_threshold\", \"ppcr\"]\n", - "]\n", - "\n", - "performance_datas[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "roc_curves = [\n", - " rtichoke.create_roc_curve(\n", - " probs=probs_dict_for_examples[x],\n", - " reals=reals_dict_for_examples[x],\n", - " size=600,\n", - " stratified_by=stratified_by,\n", - " url_api=\"http://127.0.0.1:7644/\",\n", - " )\n", - " for x in probs_dict_for_examples.keys()\n", - " for stratified_by in [\"probability_threshold\", \"ppcr\"]\n", - "]\n", - "\n", - "# roc_curves[0].show(config={'displayModeBar': False})\n", - "# roc_curves[1].show(config={'displayModeBar': False})\n", - "# roc_curves[2].show(config={'displayModeBar': False})\n", - "# roc_curves[3].show(config={'displayModeBar': False})\n", - "roc_curves[4].show(config={'displayModeBar': False})\n", - "# roc_curves[5].show(config={'displayModeBar': False})" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - }, - "vscode": { - "interpreter": { - "hash": "42ac3c661d42982b588319d2da94614317e26c8882258a0ff8ba4739749cacec" - } - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/docs/index.md b/docs/index.md deleted file mode 100644 index 6fcdc0e..0000000 --- a/docs/index.md +++ /dev/null @@ -1,13 +0,0 @@ -```{include} ../README.md -``` - -```{toctree} -:maxdepth: 1 -:hidden: - -example.ipynb -changelog.md -contributing.md -conduct.md -autoapi/index -``` \ No newline at end of file diff --git a/docs/make.bat b/docs/make.bat deleted file mode 100644 index 9a59b5d..0000000 --- a/docs/make.bat +++ /dev/null @@ -1,36 +0,0 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=python -msphinx -) -set SOURCEDIR=. -set BUILDDIR=_build -set SPHINXPROJ=rtichoke - -if "%1" == "" goto help - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The Sphinx module was not found. Make sure you have Sphinx installed, - echo.then set the SPHINXBUILD environment variable to point to the full - echo.path of the 'sphinx-build' executable. Alternatively you may add the - echo.Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 -) - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% - -:end -popd diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index 7b5bd2a..0000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -myst-nb -sphinx-autoapi -sphinx-rtd-theme \ No newline at end of file diff --git a/source/conf.py b/source/conf.py deleted file mode 100644 index 5e17869..0000000 --- a/source/conf.py +++ /dev/null @@ -1,26 +0,0 @@ -# Configuration file for the Sphinx documentation builder. -# -# For the full list of built-in configuration values, see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Project information ----------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information - -project = "Rtichoke_python" -copyright = "2023, Uri Gottlieb" -author = "Uriah Finkel | Uri Gottlieb" - -# -- General configuration --------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration - -extensions = [] - -templates_path = ["_templates"] -exclude_patterns = [] - - -# -- Options for HTML output ------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output - -html_theme = "alabaster" -html_static_path = ["_static"] diff --git a/source/index.rst b/source/index.rst deleted file mode 100644 index 6fddbc1..0000000 --- a/source/index.rst +++ /dev/null @@ -1,20 +0,0 @@ -.. Rtichoke_python documentation master file, created by - sphinx-quickstart on Thu Oct 5 10:42:07 2023. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -Welcome to Rtichoke_python's documentation! -=========================================== - -.. toctree:: - :maxdepth: 2 - :caption: Contents: - - - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/src/end_to_end_test.py b/src/end_to_end_test.py deleted file mode 100644 index 1787619..0000000 --- a/src/end_to_end_test.py +++ /dev/null @@ -1,32 +0,0 @@ -"""end-to-end testing + example""" - - -import numpy as np -import pandas as pd -from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import train_test_split -from sklearn.datasets import fetch_california_housing - -from rtichoke import Rtichoke - -# create fake data - -data = fetch_california_housing() -X = pd.DataFrame(data["data"], columns=data["feature_names"]) -y = (data["target"] > 3).astype(int) - -X_train, X_test, y_train, y_test = train_test_split(X, y) - -lr = LogisticRegression().fit(X_train, y_train) - -probs = { - "population1": lr.predict_proba( - X_test * np.random.normal(loc=1, size=X_test.shape) - )[:, 1], - "population2": lr.predict_proba(X_train)[:, 1], -} -reals = {"population1": y_test, "population2": y_train} - -r = Rtichoke(probs, reals, by=0.01) - -r.plot("calibration", filename="temp.html") diff --git a/src/rtichoke/__init__.py b/src/rtichoke/__init__.py deleted file mode 100644 index dc127fe..0000000 --- a/src/rtichoke/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -"""rtichoke is a package for interactive vizualization of performance metrics -""" - -from importlib.metadata import version - -__version__ = version("rtichoke") - -from rtichoke.discrimination.roc import create_roc_curve -from rtichoke.discrimination.roc import plot_roc_curve - -from rtichoke.discrimination.lift import create_lift_curve -from rtichoke.discrimination.lift import plot_lift_curve - -from rtichoke.discrimination.precision_recall import create_precision_recall_curve -from rtichoke.discrimination.precision_recall import plot_precision_recall_curve - -from rtichoke.discrimination.gains import create_gains_curve -from rtichoke.discrimination.gains import plot_gains_curve - -from rtichoke.calibration.calibration import create_calibration_curve - -from rtichoke.utility.decision import create_decision_curve -from rtichoke.utility.decision import plot_decision_curve - -from rtichoke.performance_data.performance_data import prepare_performance_data - -from rtichoke.summary_report.summary_report import create_summary_report diff --git a/src/rtichoke/calibration/__init__.py b/src/rtichoke/calibration/__init__.py deleted file mode 100644 index 4267999..0000000 --- a/src/rtichoke/calibration/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -""" -Subpackage for Calibration -""" diff --git a/src/rtichoke/calibration/calibration.py b/src/rtichoke/calibration/calibration.py deleted file mode 100644 index 0c1f81d..0000000 --- a/src/rtichoke/calibration/calibration.py +++ /dev/null @@ -1,286 +0,0 @@ -""" -A module for Calibration Curves -""" - -from typing import Any, Dict, List, Optional -import pandas as pd -import plotly.graph_objects as go -from plotly.subplots import make_subplots -from plotly.graph_objs._figure import Figure -from rtichoke.helpers.send_post_request_to_r_rtichoke import send_requests_to_rtichoke_r - - -def create_calibration_curve( - probs: Dict[str, List[float]], - reals: Dict[str, List[int]], - calibration_type: str = "discrete", - size: Optional[int] = None, - color_values: List[str] = None, - url_api: str = "http://localhost:4242/", -) -> Figure: - """Creates Calibration Curve - - Args: - probs (Dict[str, List[float]]): _description_ - reals (Dict[str, List[int]]): _description_ - calibration_type (str, optional): _description_. Defaults to "discrete". - size (Optional[int], optional): _description_. Defaults to None. - color_values (List[str], optional): _description_. Defaults to None. - url_api (_type_, optional): _description_. Defaults to "http://localhost:4242/". - - Returns: - Figure: _description_ - """ - if color_values is None: - color_values = [ - "#1b9e77", - "#d95f02", - "#7570b3", - "#e7298a", - "#07004D", - "#E6AB02", - "#FE5F55", - "#54494B", - "#006E90", - "#BC96E6", - "#52050A", - "#1F271B", - "#BE7C4D", - "#63768D", - "#08A045", - "#320A28", - "#82FF9E", - "#2176FF", - "#D1603D", - "#585123", - ] - - rtichoke_response = send_requests_to_rtichoke_r( - dictionary_to_send={ - "probs": probs, - "reals": reals, - "size": size, - "color_values ": color_values, - }, - url_api=url_api, - endpoint="create_calibration_curve_list", - ) - - calibration_curve_list = rtichoke_response.json() - - calibration_curve_list["deciles_dat"] = pd.DataFrame.from_dict( - calibration_curve_list["deciles_dat"] - ) - calibration_curve_list["smooth_dat"] = pd.DataFrame.from_dict( - calibration_curve_list["smooth_dat"] - ) - calibration_curve_list["reference_data"] = pd.DataFrame.from_dict( - calibration_curve_list["reference_data"] - ) - calibration_curve_list["histogram_for_calibration"] = pd.DataFrame.from_dict( - calibration_curve_list["histogram_for_calibration"] - ) - - calibration_curve = create_plotly_curve_from_calibration_curve_list( - calibration_curve_list=calibration_curve_list, calibration_type=calibration_type - ) - - return calibration_curve - - -def create_plotly_curve_from_calibration_curve_list( - calibration_curve_list: Dict[str, Any], calibration_type: str = "discrete" -) -> Figure: - """Create plotly curve from calibration curve list - - Args: - calibration_curve_list (Dict[str, Any]): _description_ - calibration_type (str, optional): _description_. Defaults to "discrete". - - Returns: - Figure: _description_ - """ - calibration_curve = make_subplots( - rows=2, cols=1, shared_xaxes=True, x_title="Predicted", row_heights=[0.8, 0.2] - ) - - calibration_curve.update_layout( - { - "xaxis": {"showgrid": False}, - "yaxis": {"showgrid": False}, - "barmode": "overlay", - "plot_bgcolor": "rgba(0, 0, 0, 0)", - "legend": { - "orientation": "h", - "xanchor": "center", - "yanchor": "top", - "x": 0.5, - "y": 1.3, - "bgcolor": "rgba(0, 0, 0, 0)", - }, - "showlegend": calibration_curve_list["performance_type"][0] != "one model", - } - ) - - calibration_curve.add_trace( - go.Scatter( - x=calibration_curve_list["reference_data"]["x"].values.tolist(), - y=calibration_curve_list["reference_data"]["y"].values.tolist(), - hovertext=calibration_curve_list["reference_data"]["text"].values.tolist(), - name="Perfectly Calibrated", - legendgroup="Perfectly Calibrated", - hoverinfo="text", - line={ - "width": 2, - "dash": "dot", - "color": calibration_curve_list["group_colors_vec"]["reference_line"][ - 0 - ], - }, - showlegend=False, - ), - row=1, - col=1, - ) - - if calibration_type == "discrete": - for reference_group in list(calibration_curve_list["group_colors_vec"].keys()): - if any( - calibration_curve_list["deciles_dat"]["reference_group"] - == reference_group - ): - calibration_curve.add_trace( - go.Scatter( - x=calibration_curve_list["deciles_dat"]["x"][ - calibration_curve_list["deciles_dat"]["reference_group"] - == reference_group - ].values.tolist(), - y=calibration_curve_list["deciles_dat"]["y"][ - calibration_curve_list["deciles_dat"]["reference_group"] - == reference_group - ].values.tolist(), - hovertext=calibration_curve_list["deciles_dat"]["text"][ - calibration_curve_list["deciles_dat"]["reference_group"] - == reference_group - ].values.tolist(), - name=reference_group, - legendgroup=reference_group, - hoverinfo="text", - mode="lines+markers", - marker={ - "size": 10, - "color": calibration_curve_list["group_colors_vec"][ - reference_group - ][0], - }, - ), - row=1, - col=1, - ) - - if calibration_type == "smooth": - for reference_group in list(calibration_curve_list["group_colors_vec"].keys()): - if any( - calibration_curve_list["smooth_dat"]["reference_group"] - == reference_group - ): - calibration_curve.add_trace( - go.Scatter( - x=calibration_curve_list["smooth_dat"]["x"][ - calibration_curve_list["smooth_dat"]["reference_group"] - == reference_group - ].values.tolist(), - y=calibration_curve_list["smooth_dat"]["y"][ - calibration_curve_list["smooth_dat"]["reference_group"] - == reference_group - ].values.tolist(), - hovertext=calibration_curve_list["smooth_dat"]["text"][ - calibration_curve_list["smooth_dat"]["reference_group"] - == reference_group - ].values.tolist(), - name=reference_group, - legendgroup=reference_group, - hoverinfo="text", - mode="lines", - marker={ - "size": 10, - "color": calibration_curve_list["group_colors_vec"][ - reference_group - ][0], - }, - ), - row=1, - col=1, - ) - - for reference_group in list(calibration_curve_list["group_colors_vec"].keys()): - if any( - calibration_curve_list["histogram_for_calibration"]["reference_group"] - == reference_group - ): - calibration_curve.add_trace( - go.Bar( - x=calibration_curve_list["histogram_for_calibration"]["mids"][ - calibration_curve_list["histogram_for_calibration"][ - "reference_group" - ] - == reference_group - ].values.tolist(), - y=calibration_curve_list["histogram_for_calibration"]["counts"][ - calibration_curve_list["histogram_for_calibration"][ - "reference_group" - ] - == reference_group - ].values.tolist(), - hovertext=calibration_curve_list["histogram_for_calibration"][ - "text" - ][ - calibration_curve_list["histogram_for_calibration"][ - "reference_group" - ] - == reference_group - ].values.tolist(), - name=reference_group, - width=0.01, - legendgroup=reference_group, - hoverinfo="text", - marker_color=calibration_curve_list["group_colors_vec"][ - reference_group - ][0], - showlegend=False, - opacity=calibration_curve_list["histogram_opacity"][0], - ), - row=2, - col=1, - ) - - print(calibration_curve_list["axes_ranges"]["xaxis"]) - - calibration_curve.update_xaxes( - zeroline=True, - range=calibration_curve_list["axes_ranges"]["xaxis"], - zerolinewidth=1, - zerolinecolor="black", - fixedrange=False, - ) - calibration_curve.update_yaxes( - zeroline=True, - range=calibration_curve_list["axes_ranges"]["yaxis"], - zerolinewidth=1, - zerolinecolor="black", - fixedrange=False, - row=1, - col=1, - ) - calibration_curve.update_yaxes(title="Observed", row=1, col=1) - - print("size") - print(calibration_curve_list["size"]) - print(calibration_curve_list["size"][0]) - - calibration_curve.update_layout( - width=calibration_curve_list["size"][0][0], - height=calibration_curve_list["size"][0][0], - ) - - return calibration_curve diff --git a/src/rtichoke/discrimination/__init__.py b/src/rtichoke/discrimination/__init__.py deleted file mode 100644 index 515d8ab..0000000 --- a/src/rtichoke/discrimination/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -""" -Subpackage for Discrimination -""" diff --git a/src/rtichoke/discrimination/gains.py b/src/rtichoke/discrimination/gains.py deleted file mode 100644 index 4bb4026..0000000 --- a/src/rtichoke/discrimination/gains.py +++ /dev/null @@ -1,71 +0,0 @@ -""" -A module for Gains Curves -""" -from typing import Dict, List, Optional -from pandas.core.frame import DataFrame -from plotly.graph_objs._figure import Figure -from rtichoke.helpers.send_post_request_to_r_rtichoke import create_rtichoke_curve -from rtichoke.helpers.send_post_request_to_r_rtichoke import plot_rtichoke_curve - - -def create_gains_curve( - probs: Dict[str, List[float]], - reals: Dict[str, List[int]], - by: float = 0.01, - stratified_by: str = "probability_threshold", - size: Optional[int] = None, - color_values: List[str] = None, - url_api: str = "http://localhost:4242/", -) -> Figure: - """Create Gains Curve - - Args: - probs (Dict[str, List[float]]): _description_ - reals (Dict[str, List[int]]): _description_ - by (float, optional): _description_. Defaults to 0.01. - stratified_by (str, optional): _description_. Defaults to "probability_threshold". - size (Optional[int], optional): _description_. Defaults to None. - color_values (List[str], optional): _description_. Defaults to None. - url_api (_type_, optional): _description_. Defaults to "http://localhost:4242/". - - Returns: - Figure: _description_ - """ - fig = create_rtichoke_curve( - probs, - reals, - by=by, - stratified_by=stratified_by, - size=size, - color_values=color_values, - url_api=url_api, - curve="gains", - ) - return fig - - -def plot_gains_curve( - performance_data: DataFrame, - size: Optional[int] = None, - color_values: List[str] = None, - url_api: str = "http://localhost:4242/", -) -> Figure: - """Plot Gains Curve - - Args: - performance_data (DataFrame): _description_ - size (Optional[int], optional): _description_. Defaults to None. - color_values (List[str], optional): _description_. Defaults to None. - url_api (_type_, optional): _description_. Defaults to "http://localhost:4242/". - - Returns: - Figure: _description_ - """ - fig = plot_rtichoke_curve( - performance_data, - size=size, - color_values=color_values, - url_api=url_api, - curve="gains", - ) - return fig diff --git a/src/rtichoke/discrimination/lift.py b/src/rtichoke/discrimination/lift.py deleted file mode 100644 index 490735f..0000000 --- a/src/rtichoke/discrimination/lift.py +++ /dev/null @@ -1,71 +0,0 @@ -""" -A module for Lift Curves -""" -from typing import Dict, List, Optional -from plotly.graph_objs._figure import Figure -from pandas.core.frame import DataFrame -from rtichoke.helpers.send_post_request_to_r_rtichoke import create_rtichoke_curve -from rtichoke.helpers.send_post_request_to_r_rtichoke import plot_rtichoke_curve - - -def create_lift_curve( - probs: Dict[str, List[float]], - reals: Dict[str, List[int]], - by: float = 0.01, - stratified_by: str = "probability_threshold", - size: Optional[int] = None, - color_values: List[str] = None, - url_api: str = "http://localhost:4242/", -) -> Figure: - """Create Lift Curve - - Args: - probs (Dict[str, List[float]]): _description_ - reals (Dict[str, List[int]]): _description_ - by (float, optional): _description_. Defaults to 0.01. - stratified_by (str, optional): _description_. Defaults to "probability_threshold". - size (Optional[int], optional): _description_. Defaults to None. - color_values (List[str], optional): _description_. Defaults to None. - url_api (_type_, optional): _description_. Defaults to "http://localhost:4242/". - - Returns: - Figure: _description_ - """ - fig = create_rtichoke_curve( - probs, - reals, - by=by, - stratified_by=stratified_by, - size=size, - color_values=color_values, - url_api=url_api, - curve="lift", - ) - return fig - - -def plot_lift_curve( - performance_data: DataFrame, - size: Optional[int] = None, - color_values: List[str] = None, - url_api: str = "http://localhost:4242/", -) -> Figure: - """Plot Lift Curve - - Args: - performance_data (DataFrame): _description_ - size (Optional[int], optional): _description_. Defaults to None. - color_values (List[str], optional): _description_. Defaults to None. - url_api (_type_, optional): _description_. Defaults to "http://localhost:4242/". - - Returns: - Figure: _description_ - """ - fig = plot_rtichoke_curve( - performance_data, - size=size, - color_values=color_values, - url_api=url_api, - curve="lift", - ) - return fig diff --git a/src/rtichoke/discrimination/precision_recall.py b/src/rtichoke/discrimination/precision_recall.py deleted file mode 100644 index 13d1a81..0000000 --- a/src/rtichoke/discrimination/precision_recall.py +++ /dev/null @@ -1,71 +0,0 @@ -""" -A module for Precision Recall Curves -""" -from typing import Dict, List, Optional -from plotly.graph_objs._figure import Figure -from pandas.core.frame import DataFrame -from rtichoke.helpers.send_post_request_to_r_rtichoke import create_rtichoke_curve -from rtichoke.helpers.send_post_request_to_r_rtichoke import plot_rtichoke_curve - - -def create_precision_recall_curve( - probs: Dict[str, List[float]], - reals: Dict[str, List[int]], - by: float = 0.01, - stratified_by: str = "probability_threshold", - size: Optional[int] = None, - color_values: List[str] = None, - url_api: str = "http://localhost:4242/", -) -> Figure: - """Create Precision Recall Curve - - Args: - probs (Dict[str, List[float]]): _description_ - reals (Dict[str, List[int]]): _description_ - by (float, optional): _description_. Defaults to 0.01. - stratified_by (str, optional): _description_. Defaults to "probability_threshold". - size (Optional[int], optional): _description_. Defaults to None. - color_values (List[str], optional): _description_. Defaults to None. - url_api (_type_, optional): _description_. Defaults to "http://localhost:4242/". - - Returns: - Figure: _description_ - """ - fig = create_rtichoke_curve( - probs, - reals, - by=by, - stratified_by=stratified_by, - size=size, - color_values=color_values, - url_api=url_api, - curve="precision recall", - ) - return fig - - -def plot_precision_recall_curve( - performance_data: DataFrame, - size: Optional[int] = None, - color_values: List[str] = None, - url_api: str = "http://localhost:4242/", -) -> Figure: - """Plot Precision Recall Curve - - Args: - performance_data (DataFrame): _description_ - size (Optional[int], optional): _description_. Defaults to None. - color_values (List[str], optional): _description_. Defaults to None. - url_api (_type_, optional): _description_. Defaults to "http://localhost:4242/". - - Returns: - Figure: _description_ - """ - fig = plot_rtichoke_curve( - performance_data, - size=size, - color_values=color_values, - url_api=url_api, - curve="precision recall", - ) - return fig diff --git a/src/rtichoke/discrimination/roc.py b/src/rtichoke/discrimination/roc.py deleted file mode 100644 index 3825b8c..0000000 --- a/src/rtichoke/discrimination/roc.py +++ /dev/null @@ -1,72 +0,0 @@ -""" -A module for ROC Curves -""" - -from typing import Dict, List, Optional -from plotly.graph_objs._figure import Figure -from pandas.core.frame import DataFrame -from rtichoke.helpers.send_post_request_to_r_rtichoke import create_rtichoke_curve -from rtichoke.helpers.send_post_request_to_r_rtichoke import plot_rtichoke_curve - - -def create_roc_curve( - probs: Dict[str, List[float]], - reals: Dict[str, List[int]], - by: float = 0.01, - stratified_by: str = "probability_threshold", - size: Optional[int] = None, - color_values: List[str] = None, - url_api: str = "http://localhost:4242/", -) -> Figure: - """Create ROC Curve - - Args: - probs (Dict[str, List[float]]): _description_ - reals (Dict[str, List[int]]): _description_ - by (float, optional): _description_. Defaults to 0.01. - stratified_by (str, optional): _description_. Defaults to "probability_threshold". - size (Optional[int], optional): _description_. Defaults to None. - color_values (List[str], optional): _description_. Defaults to None. - url_api (_type_, optional): _description_. Defaults to "http://localhost:4242/". - - Returns: - Figure: _description_ - """ - fig = create_rtichoke_curve( - probs, - reals, - by=by, - stratified_by=stratified_by, - size=size, - color_values=color_values, - url_api=url_api, - curve="roc", - ) - return fig - - -def plot_roc_curve( - performance_data: DataFrame, - size: Optional[int] = None, - color_values: List[str] = None, - url_api: str = "http://localhost:4242/", -) -> Figure: - """Plot ROC Curve - - Args: - performance_data (DataFrame): _description_ - size (Optional[int], optional): _description_. Defaults to None. - color_values (List[str], optional): _description_. Defaults to None. - url_api (_type_, optional): _description_. Defaults to "http://localhost:4242/". - - Returns: - Figure: _description_ - """ - fig = plot_rtichoke_curve( - performance_data, - size=size, - color_values=color_values, - url_api=url_api, - curve="roc", - ) - return fig diff --git a/src/rtichoke/helpers/__init__.py b/src/rtichoke/helpers/__init__.py deleted file mode 100644 index b0567e7..0000000 --- a/src/rtichoke/helpers/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -""" -Subpackage for helpers -""" diff --git a/src/rtichoke/helpers/exported_functions.py b/src/rtichoke/helpers/exported_functions.py deleted file mode 100644 index 5afcd9a..0000000 --- a/src/rtichoke/helpers/exported_functions.py +++ /dev/null @@ -1,237 +0,0 @@ -""" -A module for Creating Plotly Curves from rtichoke curve dictionaries -""" - -import plotly.graph_objects as go - -from rtichoke.helpers.plotly_helper_functions import ( - create_non_interactive_curve, - create_interactive_marker, - create_reference_lines_for_plotly, -) - -# TODO: Fix zoom for plotly curves - - -def create_plotly_curve(rtichoke_curve_dict): - """ - - Parameters - ---------- - rtichoke_curve_dict : - - - Returns - ------- - - """ - - # reference_data, - # performance_data_ready_for_curve, - # group_colors_vec, - # axis_ranges - - reference_data_list = [] - non_interactive_curve = [] - interactive_marker = [] - - curve_layout = { - "xaxis": { - "showgrid": False, - }, - "yaxis": {"showgrid": False}, - "plot_bgcolor": "rgba(0, 0, 0, 0)", - "showlegend": True, - "legend": { - "orientation": "h", - "xanchor": "center", - "yanchor": "top", - "x": 0.5, - "y": 1.3, - }, - "height": rtichoke_curve_dict["size"][0][0], - "width": rtichoke_curve_dict["size"][0][0], - "updatemenus": [ - { - "type": "buttons", - "buttons": [ - { - "label": "Play", - "method": "animate", - "visible": False, - "args": [None, {"frame": {"duration": 500, "redraw": False}}], - } - ], - } - ], - } - - for reference_group in list(rtichoke_curve_dict["group_colors_vec"].keys()): - if rtichoke_curve_dict["perf_dat_type"][0] not in [ - "several models", - "several populations", - ]: - interactive_marker_color = "#f6e3be" - else: - interactive_marker_color = rtichoke_curve_dict["group_colors_vec"][ - reference_group - ][0] - if not rtichoke_curve_dict["reference_data"].empty: - if any( - rtichoke_curve_dict["reference_data"]["reference_group"] - == reference_group - ): - reference_data_list.append( - create_reference_lines_for_plotly( - rtichoke_curve_dict["reference_data"][ - rtichoke_curve_dict["reference_data"]["reference_group"] - == reference_group - ], - rtichoke_curve_dict["group_colors_vec"][reference_group][0], - ) - ) - if any( - rtichoke_curve_dict["performance_data_ready_for_curve"]["reference_group"] - == reference_group - ): - non_interactive_curve.append( - create_non_interactive_curve( - rtichoke_curve_dict["performance_data_ready_for_curve"][ - rtichoke_curve_dict["performance_data_ready_for_curve"][ - "reference_group" - ] - == reference_group - ], - rtichoke_curve_dict["group_colors_vec"][reference_group][0], - reference_group, - ) - ) - if any( - rtichoke_curve_dict["performance_data_ready_for_curve"]["reference_group"] - == reference_group - ): - interactive_marker.append( - create_interactive_marker( - rtichoke_curve_dict["performance_data_for_interactive_marker"][ - rtichoke_curve_dict["performance_data_for_interactive_marker"][ - "reference_group" - ] - == reference_group - ], - interactive_marker_color, - 0, - reference_group, - ) - ) - - frames = [] - - sliders_dict = { - "active": 0, - "yanchor": "top", - "xanchor": "left", - "currentvalue": { - "font": {"size": 20}, - "prefix": rtichoke_curve_dict["animation_slider_prefix"][0], - "visible": True, - "xanchor": "left", - }, - "transition": {"duration": 300, "easing": "linear"}, - "pad": {"b": 10, "t": 50}, - "len": 0.9, - "x": 0.1, - "y": 0, - "steps": [], - } - - for k in range( - len( - rtichoke_curve_dict["performance_data_ready_for_curve"][ - "stratified_by" - ].unique() - ) - ): - frame_data = reference_data_list + non_interactive_curve - for reference_group in list(rtichoke_curve_dict["group_colors_vec"].keys()): - if rtichoke_curve_dict["perf_dat_type"][0] not in [ - "several models", - "several populations", - ]: - interactive_marker_color = "#f6e3be" - else: - interactive_marker_color = rtichoke_curve_dict["group_colors_vec"][ - reference_group - ][0] - - if any( - rtichoke_curve_dict["performance_data_ready_for_curve"][ - "reference_group" - ] - == reference_group - ): - frame_data.append( - create_interactive_marker( - rtichoke_curve_dict["performance_data_for_interactive_marker"][ - rtichoke_curve_dict[ - "performance_data_for_interactive_marker" - ]["reference_group"] - == reference_group - ], - interactive_marker_color, - k, - reference_group, - ) - ) - frames.append(go.Frame(data=frame_data, name=str(k))) - slider_step = { - "args": [ - [k], - {"frame": {"duration": 300, "redraw": False}, "mode": "immediate"}, - ], - "label": str( - rtichoke_curve_dict["performance_data_ready_for_curve"][ - "stratified_by" - ].unique()[k] - ), - "method": "animate", - } - sliders_dict["steps"].append(slider_step) - - curve_layout["sliders"] = [sliders_dict] - fig = go.Figure( - data=reference_data_list + non_interactive_curve + interactive_marker, - layout=curve_layout, - frames=frames, - ) - - fig.update_layout( - { - "legend": { - "orientation": "h", - "xanchor": "center", - "yanchor": "top", - "x": 0.5, - "y": 1.3, - "bgcolor": "rgba(0, 0, 0, 0)", - }, - "showlegend": rtichoke_curve_dict["perf_dat_type"][0] != "one model", - } - ) - - fig.update_xaxes( - zeroline=True, - range=rtichoke_curve_dict["axes_ranges"]["xaxis"], - zerolinewidth=1, - zerolinecolor="black", - fixedrange=True, - title={"text": rtichoke_curve_dict["axes_labels"]["xaxis"][0]}, - ) - fig.update_yaxes( - zeroline=True, - range=rtichoke_curve_dict["axes_ranges"]["yaxis"], - zerolinewidth=1, - zerolinecolor="black", - fixedrange=True, - title={"text": rtichoke_curve_dict["axes_labels"]["yaxis"][0]}, - ) - return fig diff --git a/src/rtichoke/helpers/helper_functions.py b/src/rtichoke/helpers/helper_functions.py deleted file mode 100644 index 52c05bf..0000000 --- a/src/rtichoke/helpers/helper_functions.py +++ /dev/null @@ -1,105 +0,0 @@ -"""A module with several helper functions for rtichoke""" - -from datetime import datetime -import numpy as np - - -def tprint(string): - """prints `string` with preceeding timestamp - - Args: - string (str): string to print - """ - now = datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S") - print(now + " - " + string) - - -def select_data_table(self, x_axis, y_axis, stratification="probability_threshold"): - """A method to return a dataframe which is a subset of the required performance table/ - - Args: - x_axis (str): name of x axis column - y_axis (str): name of y axis column - stratification (str, optional): stratification method. Defaults to "probability_threshold". - - Returns: - pd.DataFrame: A subset of the required performance table for plotting. - """ - which_df = ( - self.performance_table_pt - if stratification == "probability_threshold" - else self.performance_table_ppcr - ) - cols = list( - set( - [ - "Population", - "predicted_positives", - "probability_threshold", - "ppcr", - x_axis, - y_axis, - ] - ) - ) - return which_df[cols] - - -def modified_calibration_curve( - _, - reals, - probs, - n_bins=10, - strategy="quantile", -): - """A modified version of sklearn.calibration.calibration_curve - (https://scikit-learn.org/stable/modules/generated/sklearn.calibration.calibration_curve.html), - to return number over cases in each bin. - - - Args: - reals (array-like of shape (n_samples,)): True targets - probs (array-like of shape (n_samples,)): Probabilities of the positive class. - n_bins (int, optional): Number of bins to discretize the [0, 1] interval. - A bigger number requires more data. - Defaults to 10. - strategy (str, optional): Strategy used to define the widths of the bins. - uniform: The bins have identical widths. - quantile: The bins have the same number of samples and depend on probs (Default). - - Returns: - prob_true: The proportion of samples whose class is the positive class, - in each bin (fraction of positives). - prob_pred: The mean predicted probability in each bin. - bin_sums: number of cases predicted as positive in each bin. - bin_true: number of actual positive in each bin. - bin_total: number of cases in each bin. - """ - if strategy == "quantile": # Determine bin edges by distribution of data - quantiles = np.linspace(0, 1, n_bins + 1) - bins = np.percentile(probs, quantiles * 100) - elif strategy == "uniform": - bins = np.linspace(0.0, 1.0, n_bins + 1) - else: - raise ValueError( - "Invalid entry to 'strategy' input. Strategy " - "must be either 'quantile' or 'uniform'." - ) - - binids = np.searchsorted(bins[1:-1], probs) - - bin_sums = np.bincount(binids, weights=probs, minlength=len(bins)) - bin_true = np.bincount(binids, weights=reals, minlength=len(bins)) - bin_total = np.bincount(binids, minlength=len(bins)) - - nonzero = bin_total != 0 - prob_true = bin_true[nonzero] / bin_total[nonzero] - prob_pred = bin_sums[nonzero] / bin_total[nonzero] - - return ( - prob_true, - prob_pred, - bin_sums[nonzero].astype(int), - bin_true[nonzero].astype(int), - bin_total[nonzero].astype(int), - ) diff --git a/src/rtichoke/helpers/plotly_helper_functions.py b/src/rtichoke/helpers/plotly_helper_functions.py deleted file mode 100644 index 9cfe9b1..0000000 --- a/src/rtichoke/helpers/plotly_helper_functions.py +++ /dev/null @@ -1,114 +0,0 @@ -""" -A module for helpers related to plotly -""" - -import plotly.graph_objects as go - - -def create_non_interactive_curve( - performance_data_ready_for_curve, reference_group_color, reference_group -): - """ - - Parameters - ---------- - performance_data_ready_for_curve : - - reference_group_color : - - - Returns - ------- - - """ - performance_data_ready_for_curve = performance_data_ready_for_curve.dropna() - # print("Print y values non interactive") - # print(performance_data_ready_for_curve['y'].values) - # print("Done Printing non interactive") - print(reference_group) - non_interactive_curve = go.Scatter( - x=performance_data_ready_for_curve["x"].values.tolist(), - y=performance_data_ready_for_curve["y"].values.tolist(), - mode="markers+lines", - hoverinfo="text", - hovertext=performance_data_ready_for_curve["text"].values.tolist(), - name=reference_group, - legendgroup=reference_group, - line={"width": 2, "color": reference_group_color}, - ) - return non_interactive_curve - - -def create_interactive_marker( - performance_data_ready_for_curve, interactive_marker_color, k, reference_group -): - """ - - Parameters - ---------- - performance_data_ready_for_curve : - - reference_group_color : - - k : - - - Returns - ------- - - """ - performance_data_ready_for_curve = performance_data_ready_for_curve.assign( - column_name=performance_data_ready_for_curve.loc[:, "y"].fillna(-1) - ) - - # print("Print y values in k") - # print(performance_data_ready_for_curve["x"].values.tolist()[k]) - # print("Done Printing") - - # print("Print y values") - # print(performance_data_ready_for_curve['y'].values) - # print("Done Printing") - - interactive_marker = go.Scatter( - x=[performance_data_ready_for_curve["x"].values.tolist()[k]], - y=[performance_data_ready_for_curve["y"].values.tolist()[k]], - mode="markers", - hoverinfo="text", - hovertext=[performance_data_ready_for_curve["text"].values.tolist()[k]], - name=reference_group, - legendgroup=reference_group, - showlegend=False, - marker={ - "size": 12, - "color": interactive_marker_color, - "line": {"width": 2, "color": "black"}, - }, - ) - return interactive_marker - - -def create_reference_lines_for_plotly(reference_data, reference_line_color): - """Creates a plotly scatter object of the reference lines - - Parameters - ---------- - reference_data : - - reference_line_color : - - - Returns - ------- - - """ - reference_lines = go.Scatter( - x=reference_data["x"].values.tolist(), - y=reference_data["y"].values.tolist(), - mode="lines", - hoverinfo="text", - hovertext=reference_data["text"].values.tolist(), - name="reference_line", - line={"width": 2, "color": reference_line_color, "dash": "dot"}, - showlegend=False, - ) - return reference_lines diff --git a/src/rtichoke/helpers/send_post_request_to_r_rtichoke.py b/src/rtichoke/helpers/send_post_request_to_r_rtichoke.py deleted file mode 100644 index 2fdd1c7..0000000 --- a/src/rtichoke/helpers/send_post_request_to_r_rtichoke.py +++ /dev/null @@ -1,195 +0,0 @@ -""" -A module for sending post requests to rtichoke r api -""" - -import requests -import pandas as pd -from rtichoke.helpers.exported_functions import create_plotly_curve - - -def send_requests_to_rtichoke_r(dictionary_to_send, url_api, endpoint): - """Send requests to rtichoke r - - Args: - dictionary_to_send (_type_): _description_ - url_api (_type_): _description_ - endpoint (_type_): _description_ - - Returns: - _type_: _description_ - """ - rtichoke_response = requests.post(f"{url_api}{endpoint}", json=dictionary_to_send) - - return rtichoke_response - - -def create_rtichoke_curve( - probs, - reals, - by, - stratified_by, - size, - color_values=None, - url_api="http://localhost:4242/", - curve="roc", - min_p_threshold=0, - max_p_threshold=1, -): - """Create rtichoke curve - - Args: - probs (_type_): _description_ - reals (_type_): _description_ - by (_type_): _description_ - stratified_by (_type_): _description_ - size (_type_): _description_ - color_values (_type_, optional): _description_. Defaults to None. - url_api (str, optional): _description_. Defaults to "http://localhost:4242/". - curve (str, optional): _description_. Defaults to "roc". - min_p_threshold (int, optional): _description_. Defaults to 0. - max_p_threshold (int, optional): _description_. Defaults to 1. - - Returns: - _type_: _description_ - """ - if color_values is None: - color_values = [ - "#1b9e77", - "#d95f02", - "#7570b3", - "#e7298a", - "#07004D", - "#E6AB02", - "#FE5F55", - "#54494B", - "#006E90", - "#BC96E6", - "#52050A", - "#1F271B", - "#BE7C4D", - "#63768D", - "#08A045", - "#320A28", - "#82FF9E", - "#2176FF", - "#D1603D", - "#585123", - ] - - rtichoke_response = send_requests_to_rtichoke_r( - dictionary_to_send={ - "probs": probs, - "reals": reals, - "curve": curve, - "by": by, - "stratified_by": stratified_by, - "size": size, - "color_values": color_values, - "min_p_threshold": min_p_threshold, - "max_p_threshold": max_p_threshold, - }, - url_api=url_api, - endpoint="create_rtichoke_curve_list", - ) - - rtichoke_curve_list = rtichoke_response.json() - - if rtichoke_curve_list["size"][0] is None: - rtichoke_curve_list["size"] = [[None]] - - rtichoke_curve_list["reference_data"] = pd.DataFrame.from_dict( - rtichoke_curve_list["reference_data"] - ) - rtichoke_curve_list["performance_data_ready_for_curve"] = pd.DataFrame.from_dict( - rtichoke_curve_list["performance_data_ready_for_curve"] - ) - rtichoke_curve_list[ - "performance_data_for_interactive_marker" - ] = pd.DataFrame.from_dict( - rtichoke_curve_list["performance_data_for_interactive_marker"] - ) - - fig = create_plotly_curve(rtichoke_curve_list) - - return fig - - -def plot_rtichoke_curve( - performance_data, - size=None, - color_values=None, - url_api="http://localhost:4242/", - curve="roc", - min_p_threshold=0, - max_p_threshold=1, -): - """plot rtichoke curve - - Args: - performance_data (_type_): _description_ - size (_type_, optional): _description_. Defaults to None. - color_values (_type_, optional): _description_. Defaults to None. - url_api (str, optional): _description_. Defaults to "http://localhost:4242/". - curve (str, optional): _description_. Defaults to "roc". - min_p_threshold (int, optional): _description_. Defaults to 0. - max_p_threshold (int, optional): _description_. Defaults to 1. - - Returns: - _type_: _description_ - """ - if color_values is None: - color_values = [ - "#1b9e77", - "#d95f02", - "#7570b3", - "#e7298a", - "#07004D", - "#E6AB02", - "#FE5F55", - "#54494B", - "#006E90", - "#BC96E6", - "#52050A", - "#1F271B", - "#BE7C4D", - "#63768D", - "#08A045", - "#320A28", - "#82FF9E", - "#2176FF", - "#D1603D", - "#585123", - ] - rtichoke_response = send_requests_to_rtichoke_r( - dictionary_to_send={ - "performance_data": performance_data.to_json(orient="records"), - "curve": curve, - "size": size, - "color_values": color_values, - "min_p_threshold": min_p_threshold, - "max_p_threshold": max_p_threshold, - }, - url_api=url_api, - endpoint="plot_rtichoke_curve_list", - ) - - rtichoke_curve_list = rtichoke_response.json() - - if rtichoke_curve_list["size"][0] is None: - rtichoke_curve_list["size"] = [[None]] - - rtichoke_curve_list["reference_data"] = pd.DataFrame.from_dict( - rtichoke_curve_list["reference_data"] - ) - rtichoke_curve_list["performance_data_ready_for_curve"] = pd.DataFrame.from_dict( - rtichoke_curve_list["performance_data_ready_for_curve"] - ) - rtichoke_curve_list[ - "performance_data_for_interactive_marker" - ] = pd.DataFrame.from_dict( - rtichoke_curve_list["performance_data_for_interactive_marker"] - ) - - fig = create_plotly_curve(rtichoke_curve_list) - - return fig diff --git a/src/rtichoke/helpers/validations.py b/src/rtichoke/helpers/validations.py deleted file mode 100644 index 1bad522..0000000 --- a/src/rtichoke/helpers/validations.py +++ /dev/null @@ -1,133 +0,0 @@ -"""script for validating inputs to rtichoke methods""" - - -def validate_inputs(_, probs, reals): - """A mother-function to run all other validation functions - - Args: - probs (np.array): an array of probabilities - reals (np.array): an array of true values (0's or 1's) - by (float): argument to set the distance between explored threshold probabilities - stratified_by (string): must be either "probability_threshold" or "ppcr" - """ - check_probs(probs) - check_probs_vs_reals(probs, reals) - check_reals(reals) - - -def check_probs(probs): - """ - Validate probs by ensuring all values are between [0, 1] - - Args: - probs (np.array): an array of probabilities - - Raises: - ValueError: when validation fails - - Returns: - Boolean: True when validation passed (else raise exception) - """ - if min(probs) < 0 or max(probs) > 1: - raise ValueError("Probs must be within [0, 1]") - - -def check_probs_vs_reals(probs, reals): - """ - Validate probs vs. reals: - 1. probs and reals must have the same shape - 2. at least two values should be included in each array - - Args: - probs (np.array): an array of probabilities - reals (np.array): an array of true values (0's or 1's) - - Raises: - ValueError: when either validation fails - - Returns: - Boolean: True when validation passed (else raise exception) - """ - if probs.shape != reals.shape: - raise ValueError( - f"Probs and reals shapes are inconsistent ({probs.shape} and {reals.shape})" - ) - if len(probs) < 2: - raise ValueError("At least two entries should be included reals and probs") - - -def check_reals(reals): - """ - Validate reals consist of only 0's and 1's, including positive and negative examples - - Args: - reals (np.array): an array of true values (0's or 1's) - - Raises: - ValueError: when validation fails - - Returns: - Boolean: True when validation passed (else raise exception) - """ - if set(reals) != {0, 1}: - raise ValueError("Reals must include only 0's and 1's") - - -def check_by(self): - """ - Validate `by` argument is between 0 and 0.5 - - Args: - by (float): argument to set the distance between explored threshold probabilities - - Raises: - ValueError: when validation fails - - Returns: - Boolean: True when validation passed (else raise exception) - """ - if not (isinstance(self.by, float) and (self.by > 0) and (self.by <= 0.5)): - raise ValueError("Argument `by` must be a float, 0 > `by` <= 0.5") - - -def validate_plot_inputs(_, curve_type, stratification): - """This function runs child validation functions to ensure proper plotting inputs - - Args: - curve_type (str): Available plots: "ROC", "LIFT", "PR", "NB", or "calibration" - stratification (str, optional): Stratifiction method ("PPCR" or "probability_threshold"). - Defaults to "probability_threshold". - """ - check_plot_curve_type(curve_type) - check_plot_stratification(stratification) - - -def check_plot_curve_type(curve_type): - """A method to verify requested curve_type is available. - - Args: - curve_type (str): Available plots: "ROC", "LIFT", "PR", "NB", or "calibration" - - Raises: - ValueError: when `curve_type` is not one of the available plots. - """ - available_plots = ["ROC", "LIFT", "PR", "NB", "calibration"] - if curve_type not in available_plots: - raise ValueError( - f"curve_type {curve_type} not recognized. Supported curves :{available_plots}" - ) - - -def check_plot_stratification(stratification): - """A method to verify stratification method - - Args: - stratification (str, optional): Stratifiction method ("PPCR" or "probability_threshold") - - Raises: - ValueError: when `stratification` is not "PPCR" or "probability_threshold". - """ - if stratification not in ["probability_threshold", "ppcr"]: - raise ValueError( - "stratification has to be wither probability_threshold or ppcr" - ) diff --git a/src/rtichoke/performance_data/__init__.py b/src/rtichoke/performance_data/__init__.py deleted file mode 100644 index 31493a7..0000000 --- a/src/rtichoke/performance_data/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -""" -Subpackage for Performance Data -""" diff --git a/src/rtichoke/performance_data/performance_data.py b/src/rtichoke/performance_data/performance_data.py deleted file mode 100644 index 5a8f72d..0000000 --- a/src/rtichoke/performance_data/performance_data.py +++ /dev/null @@ -1,41 +0,0 @@ -""" -A module for Performance Data -""" - -from typing import Dict, List -from pandas.core.frame import DataFrame -import pandas as pd -from rtichoke.helpers.send_post_request_to_r_rtichoke import send_requests_to_rtichoke_r - - -def prepare_performance_data( - probs: Dict[str, List[float]], - reals: Dict[str, List[int]], - stratified_by: str = "probability_threshold", - url_api: str = "http://localhost:4242/", -) -> DataFrame: - """Prepare Performance Data - - Args: - probs (Dict[str, List[float]]): _description_ - reals (Dict[str, List[int]]): _description_ - stratified_by (str, optional): _description_. Defaults to "probability_threshold". - url_api (_type_, optional): _description_. Defaults to "http://localhost:4242/". - - Returns: - DataFrame: _description_ - """ - rtichoke_response = send_requests_to_rtichoke_r( - dictionary_to_send={ - "probs": probs, - "reals": reals, - "stratified_by": stratified_by, - }, - url_api=url_api, - endpoint="prepare_performance_data", - ) - - performance_data = pd.DataFrame( - rtichoke_response.json(), columns=list(rtichoke_response.json()[0].keys()) - ) - return performance_data diff --git a/src/rtichoke/performance_data/prepare_calibration_data.py b/src/rtichoke/performance_data/prepare_calibration_data.py deleted file mode 100644 index 09e36f2..0000000 --- a/src/rtichoke/performance_data/prepare_calibration_data.py +++ /dev/null @@ -1,109 +0,0 @@ -"""Functions to create data for calibration plots""" - -import numpy as np -import pandas as pd - - -def prepare_calibration_data(self, n_bins, strategy): - """ - User's function to produce performance data table for probs/reals. - probs/reals may represent one probs vs. one reals, several probs vs. one real, - or several probs vs. several reals. - - Args: - probs (list, np.array, pd.Series, or dict): - an array of probabilities or a dictionary {'pop_name': array of probabilities} - reals (list, np.array, pd.Series, or dict): - an array of binary results or a dictionary {'pop_name': arary of binary results} - - Returns: - pd.DataFrame: a dataframe with performance metrics - """ - if isinstance(self.probs, dict) and isinstance(self.reals, dict): - assert ( - self.probs.keys() == self.reals.keys() - ), "When sending dictionaries, probs and reals must have the same keys" - - return pd.concat( - [ - self.prepare_calibration_table( - probs=self.probs[key], - reals=self.reals[key], - n_bins=n_bins, - strategy=strategy, - pop_name=key, - ) - for key in self.probs.keys() - ] - ) - - if isinstance(self.probs, dict) and isinstance( - self.reals, (list, np.ndarray, pd.Series) - ): - return pd.concat( - [ - self.prepare_calibration_table( - probs=self.probs[key], - reals=self.reals, - n_bins=n_bins, - strategy=strategy, - pop_name=key, - ) - for key in self.probs.keys() - ] - ) - - if isinstance(self.probs, (list, np.ndarray, pd.Series)) and isinstance( - self.reals, (list, np.ndarray, pd.Series) - ): - return self.prepare_calibration_table( - probs=self.probs, - reals=self.reals, - n_bins=n_bins, - strategy=strategy, - ) - - raise ValueError("Wrong inputs provided for probs and reals") - - -def prepare_calibration_table(self, probs, reals, n_bins, strategy, pop_name="pop1"): - """Generate calibration data table for a single set of probs and reals. - - Args: - probs (list, np.array, pd.Series): an array of probabilities - reals (list, np.array, pd.Series): an array of true values (0's or 1's) - pop_name (str, optional): A population name, when asking for performance - metrics for several populations. - Defaults to 'pop1'. - - Returns: - pd.DataFrame: a dataframe with calibration data - """ - - # convert inputs to np.arrays - probs = np.array(probs) - reals = np.array(reals) - - # verify inputs - self.validate_inputs(probs, reals) - ( - prob_true, - prob_pred, - pred_pos, - actual_pos, - total_cases, - ) = self.modified_calibration_curve( - reals=reals, probs=probs, n_bins=n_bins, strategy=strategy - ) - - return pd.DataFrame( - { - "Population": [pop_name] * len(prob_true), - "prob_true": prob_true, - "prob_pred": prob_pred, - "pred_pos": pred_pos, - "actual_pos": actual_pos, - "total_cases": total_cases, - "total_N": [self.N[pop_name]] * len(prob_true), - } - ) diff --git a/src/rtichoke/performance_data/prepare_performance_data.py b/src/rtichoke/performance_data/prepare_performance_data.py deleted file mode 100644 index 112dab6..0000000 --- a/src/rtichoke/performance_data/prepare_performance_data.py +++ /dev/null @@ -1,168 +0,0 @@ -"""Functions to create performance data tables""" - -import numpy as np -import pandas as pd -from sklearn.metrics import confusion_matrix -from tqdm import tqdm - - -def prepare_performance_data(self, stratified_by): - """ - User's function to produce performance data table for probs/reals. - probs/reals may represent one probs vs. one reals, several probs vs. one real, - or several probs vs. several reals. - - Args: - probs (list, np.array, pd.Series, or dict): an array of probabilities or a dictionary - {'pop_name': array of probabilities} - reals (list, np.array, pd.Series, or dict): an array of binary results or a dictionary - {'pop_name': arary of binary results} - by (float, optional): argument to set the distance between explored threshold probabilities. - Defaults to 0.01. - stratified_by (string, optional): must be either "probability_threshold" or "ppcr". - Defaults to "probability_threshold". - - Returns: - pd.DataFrame: a dataframe with performance metrics - """ - if isinstance(self.probs, dict) and isinstance(self.reals, dict): - assert ( - self.probs.keys() == self.reals.keys() - ), "When sending dictionaries, probs and reals must have the same keys" - - return pd.concat( - [ - self.prepare_performance_table( - probs=self.probs[key], - reals=self.reals[key], - by=self.by, - stratified_by=stratified_by, - pop_name=key, - ) - for key in self.probs.keys() - ] - ) - - if isinstance(self.probs, dict) and isinstance( - self.reals, (list, np.ndarray, pd.Series) - ): - return pd.concat( - [ - self.prepare_performance_table( - probs=self.probs[key], - reals=self.reals, - by=self.by, - stratified_by=stratified_by, - pop_name=key, - ) - for key in self.probs.keys() - ] - ) - - if isinstance(self.probs, (list, np.ndarray, pd.Series)) and isinstance( - self.reals, (list, np.ndarray, pd.Series) - ): - return self.prepare_performance_table( - probs=self.probs, reals=self.reals, by=self.by, stratified_by=stratified_by - ) - - raise ValueError("Wrong inputs provided for probs and reals") - - -def prepare_performance_table(self, probs, reals, by, stratified_by, pop_name="pop1"): - """Generate performance table for a single set of probs and reals. - - Args: - probs (list, np.array, pd.Series): an array of probabilities - reals (list, np.array, pd.Series): an array of true values (0's or 1's) - by (float, optional): argument to set the distance between explored - threshold probabilities. Defaults to 0.01. - stratified_by (string, optional): must be either "probability_threshold" or "ppcr". - Defaults to "probability_threshold". - pop_name (str, optional): A population name, when asking for performance - metrics for several populations. Defaults to 'pop1'. - - Returns: - pd.DataFrame: a dataframe with performance metrics - """ - - # update prevalence and N: - self.prevalence.update({pop_name: reals.mean()}) - self.N.update({pop_name: len(reals)}) - - # convert inputs to np.arrays - probs = np.array(probs) - reals = np.array(reals) - - # verify inputs - self.validate_inputs(probs, reals) - # decimals = len(str(self.by).split(".")[1].rstrip("0")) - - # define probabilty thresholds - prob_thresholds = np.append(np.arange(0, 1, by), 1) - - # if ppcr is required, adjust probability threholds accordingly. - if stratified_by == "ppcr": - ppcr = np.append(np.arange(0, 1, by), 1) - prob_thresholds = np.array([np.quantile(probs, p) for p in prob_thresholds]) - prob_thresholds[0] = 0.0 - else: - ppcr = [] - - # define performance table - performance_table = { - "Population": [], - "probability_threshold": prob_thresholds, - "ppcr": ppcr, - "predicted_positives": [], - "TP": [], - "FP": [], - "FN": [], - "TN": [], - } - - # run over all probability thresholds and calculate confusion matrix - for p in tqdm( - prob_thresholds, desc="Calculating performance data", leave=False, delay=0.5 - ): - preds = (probs > p).astype(int) - if stratified_by == "probability_threshold": - performance_table["ppcr"].append(preds.mean()) - performance_table["predicted_positives"].append(preds.sum()) - - tn, fp, fn, tp = confusion_matrix(reals, preds).ravel() - performance_table["TP"].append(tp) - performance_table["FP"].append(fp) - performance_table["FN"].append(fn) - performance_table["TN"].append(tn) - - # define additional metrics - performance_table["Population"] = [pop_name] * len(prob_thresholds) - performance_table = pd.DataFrame(performance_table) - performance_table["Sensitivity"] = performance_table["TP"] / ( - performance_table["TP"] + performance_table["FN"] - ) - performance_table["Specificity"] = performance_table["TN"] / ( - performance_table["TN"] + performance_table["FP"] - ) - performance_table["FPR"] = 1 - performance_table["Specificity"] - - performance_table["PPV"] = ( - performance_table["TP"] / (performance_table["TP"] + performance_table["FP"]) - ).fillna(1.0) - performance_table["NPV"] = performance_table["TN"] / ( - performance_table["TN"] + performance_table["FN"] - ) - performance_table["lift"] = performance_table["PPV"] / reals.mean() - performance_table["Net_benefit"] = performance_table[ - "Sensitivity" - ] * reals.mean() - (1 - performance_table["Specificity"]) * (1 - reals.mean()) * ( - performance_table["probability_threshold"] - / (1 - performance_table["probability_threshold"]) - ) - - return ( - performance_table - if stratified_by == "probability_threshold" - else performance_table.iloc[::-1] - ) diff --git a/src/rtichoke/plot/__init__.py b/src/rtichoke/plot/__init__.py deleted file mode 100644 index 9e52288..0000000 --- a/src/rtichoke/plot/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -""" -Subpackage for plotting -""" diff --git a/src/rtichoke/plot/bokeh/__init__.py b/src/rtichoke/plot/bokeh/__init__.py deleted file mode 100644 index 172ef19..0000000 --- a/src/rtichoke/plot/bokeh/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -""" -Subpackage for bokeh plotting -""" diff --git a/src/rtichoke/plot/bokeh/create_bokeh_plot_dict.py b/src/rtichoke/plot/bokeh/create_bokeh_plot_dict.py deleted file mode 100644 index 388ea4e..0000000 --- a/src/rtichoke/plot/bokeh/create_bokeh_plot_dict.py +++ /dev/null @@ -1,111 +0,0 @@ -"""Helper functions for Bokeh plotting""" -from bokeh.palettes import Spectral11 as palette -from bokeh.models import CategoricalColorMapper -from bokeh.models import CustomJS - - -def create_JS_code(x, y, stratification): - """A method to return JS code to bokeh's plots""" - js_code = f"""const source_data = source.data; - const pop = source_data.Population; - const x = source_data.{x}; - const y = source_data.{y}; - const probability_threshold = source_data.probability_threshold; - const predicted_positives = source_data.predicted_positives; - const ppcr = source_data.ppcr; - const pt = val.value; - - const j = val.step.toString().split('.')[1].length; - const filter = source_data.{stratification}.map((num) => num.toFixed(j) == pt.toFixed(j)); - - scatter_source.data = {{ - Population: pop.filter((_, i) => filter[i]), - {x}: x.filter((_, i) => filter[i]), - {y}: y.filter((_, i) => filter[i]), - probability_threshold: probability_threshold.filter((_, i) => filter[i]), - ppcr: ppcr.filter((_, i) => filter[i]), - predicted_positives: predicted_positives.filter((_, i) => filter[i]) - }}; - - scatter_source.change.emit(); - """ - return js_code - - -_legend_positions = { - "ROC": "bottom_right", - "LIFT": "top_right", - "gains": "top_right", - "PR": "top_right", - "NB": "bottom_left", - "calibration": "top_left", -} -_generic_hover = [ - ("Dataset", "@Population"), - ("Prob. threshold", "@probability_threshold{0.000}"), - ("PPCR", "@ppcr{0.000}"), - ("Predicted positive", "@predicted_positives"), -] - - -def create_bokeh_plot_dict(bokeh_plot_dict): - """Function to turn generic_plot_dict into bokeh_plot_dict - - Args: - bokeh_plot_dict (dict): generic plot dict - - Returns: - dict: plot dict for Bokeh's interface - """ - curve_type = bokeh_plot_dict["curve_type"] - - bokeh_plot_dict["legend"] = _legend_positions[curve_type] - - if curve_type != "calibration": - specific_hover_info = [ - (l, "@" + l + "{0.000}") for l in bokeh_plot_dict["hover_info"] - ] - bokeh_plot_dict["hover_info"] = _generic_hover + specific_hover_info - else: - bokeh_plot_dict["hover_info"] = [ - ("Dataset", "@Population"), - ("Predicted", "@prob_pred{0.000} (@pred_pos / @total_cases)"), - ("Observed", "@prob_true{0.000} (@actual_pos / @total_cases)"), - ] - return bokeh_plot_dict - - -def create_pops_and_colors(df): - """Function to provide each population in the performance data table a unique color - - Args: - df (pd.DataFrame): Performance data table - - Returns: - pops: array of populations names - colors: list of colors - color_map: Bokeh's color map - """ - pops = df.Population.unique() - colors = palette[0 : len(pops)] - color_map = CategoricalColorMapper(factors=pops, palette=palette) - - return pops, colors, color_map - - -def link_legend_glyphs(ref_fig, target_figs): - """Function to link legends in Bokeh""" - cb = CustomJS( - args={"target_figs": target_figs}, - code=""" - const glyph_name = cb_obj.name; - for (const element of target_figs) { - const r = element.select(name = glyph_name)[0]; - //r.muted = cb_obj.muted; - r.visible = cb_obj.visible; - } - """, - ) - for i in ref_fig.legend[0].items: - r = i.renderers[0] - r.js_on_change("visible", cb) diff --git a/src/rtichoke/plot/bokeh/plot_bokeh.py b/src/rtichoke/plot/bokeh/plot_bokeh.py deleted file mode 100644 index 0aa69ac..0000000 --- a/src/rtichoke/plot/bokeh/plot_bokeh.py +++ /dev/null @@ -1,251 +0,0 @@ -"""Plotting sub-module for Bokeh interface""" - -from bokeh.layouts import column, gridplot -from bokeh.models import ( - ColumnDataSource, - Slider, - CustomJS, - HoverTool, - CDSView, - GroupFilter, - BoxZoomTool, -) -from bokeh.plotting import figure, output_file, save, show -import numpy as np - -# from collections import OrderedDict -from .create_bokeh_plot_dict import ( - create_JS_code, - create_bokeh_plot_dict, - create_pops_and_colors, - link_legend_glyphs, -) - - -def plot_bokeh(self, generic_plot_dict, filename=None): - """Main method to plot using Bokeh interface - - Args: - generic_plot_dict (dict): A generic (all-interface) dict containing plotting data - filename (str, optional): Filename to save plot as html. Defaults to None. - """ - curve_type = generic_plot_dict["curve_type"] - - if curve_type == "calibration": - plot_bokeh_calibration(self, generic_plot_dict, filename=filename) - return None - - stratification = generic_plot_dict["stratification"] - - graph_meta = create_bokeh_plot_dict(generic_plot_dict) - x = graph_meta["x"] - y = graph_meta["y"] - df = self.select_data_table(x=x, y=y, stratification=stratification) - pops, colors, color_map = create_pops_and_colors(df) - - # Create plots and widgets - if filename: - output_file(filename) - - plot = figure( - title=f"{curve_type} curve", - tools="box_zoom", - ) - - lines_source = [] - lines = [] - for j, pop in enumerate(pops): - lines_source.append(ColumnDataSource(data=df[df["Population"] == pop])) - - lines.append( - plot.line( - x=x, - y=y, - line_color=colors[j], - legend_label=pop, - name=pop, - line_width=3, - line_alpha=0.5, - source=lines_source[j], - ) - ) - - plot.circle( - x=x, - y=y, - size=3, - color=colors[j], - fill_alpha=0.25, - source=lines_source[j], - legend_label=pop, - ) - - plot.legend.location = graph_meta["legend"] - plot.legend.click_policy = "hide" - - # add hover data - hover = HoverTool( - renderers=lines, - tooltips=graph_meta["hover_info"], - ) - plot.add_tools(hover) - - plot.line( - graph_meta["reference"]["x"], - graph_meta["reference"]["y"], - line_width=1, - color="gray", - ) - - # Create Slider object - slider = Slider( - start=0, - end=1, - value=df.dropna()[stratification].min(), - step=self.by, - title="Prob. threshold" - if stratification == "probability_threshold" - else "PPCR", - ) - - # add scatter - filtered_scatter = [th == slider.value for th in df[stratification]] - scatter_source = ColumnDataSource(data=df.loc[filtered_scatter]) - - for j, pop in enumerate(pops): - view = CDSView(filter=GroupFilter(column_name="Population", group=pop)) - - plot.circle( - x=x, - y=y, - source=scatter_source, - size=11, - fill_alpha=0.85, - line_color="black", - color={"field": "Population", "transform": color_map}, - view=view, - legend_label=pop, - ) - - # Adding callback code - source = ColumnDataSource(data=df.set_index("Population")) - callback = CustomJS( - args={"source": source, "scatter_source": scatter_source, "val": slider}, - code=create_JS_code(x=x, y=y, stratification=stratification), - ) - - slider.js_on_change("value", callback) - - # customize plots and finish - plot.xaxis.axis_label = graph_meta["xlabel"] - plot.yaxis.axis_label = graph_meta["ylabel"] - layout = column(slider, plot) - - if filename: - save(layout) - else: - show(layout) - - -def plot_bokeh_calibration(self, generic_plot_dict, filename=None): - """Main method to plot calibration curve using Bokeh - - Args: - generic_plot_dict (dict): A generic (all-interface) dict containing plotting data - filename (str, optional): Filename to save plot as html. Defaults to None. - """ - - curve_type = "calibration" - graph_meta = create_bokeh_plot_dict(generic_plot_dict) - x = graph_meta["x"] - y = graph_meta["y"] - df = self.calibration_table - pops, colors, _ = create_pops_and_colors(df) - - # Create plots and widgets - if filename: - output_file(filename) - - plot = figure( - title=f"{curve_type} curve", - tools=[BoxZoomTool(match_aspect=True)], - ) - - histogram = figure(aspect_ratio=3.5, tools=plot.tools, x_range=plot.x_range) - - lines_source = [] - lines = [] - circles = [] - histograms = [] - - for j, pop in enumerate(pops): - lines_source.append(ColumnDataSource(data=df[df["Population"] == pop])) - lines.append( - plot.line( - x=x, - y=y, - line_color=colors[j], - legend_label=pop, - name=pop, - line_width=3, - line_alpha=0.75, - source=lines_source[j], - ) - ) - circles.append( - plot.circle( - x=x, - y=y, - line_color="black", - fill_color=colors[j], - fill_alpha=0.5, - size=8, - legend_label=pop, - source=lines_source[j], - ) - ) - - probs = self.probs[pop] if pop in self.probs.keys() else self.probs - hist, edges = np.histogram(probs, bins=np.linspace(0, 1, 101)) - - histograms.append( - histogram.quad( - top=hist, - bottom=0, - left=edges[:-1], - right=edges[1:], - line_color="black", - alpha=0.5, - name=pop, - fill_color=colors[j], - ) - ) - - plot.line( - graph_meta["reference"]["x"], - graph_meta["reference"]["y"], - line_width=1, - color="gray", - ) - - plot.legend.location = graph_meta["legend"] - plot.legend.click_policy = "hide" - - # add hover data - hover = HoverTool( - renderers=circles, - tooltips=graph_meta["hover_info"], - ) - plot.add_tools(hover) - - # customize plots and finish - plot.xaxis.axis_label = graph_meta["xlabel"] - plot.yaxis.axis_label = graph_meta["ylabel"] - - link_legend_glyphs(plot, [histogram]) - layout = gridplot([[plot], [histogram]]) - - if filename: - save(layout) - else: - show(layout) diff --git a/src/rtichoke/plot/create_generic_plot_dict.py b/src/rtichoke/plot/create_generic_plot_dict.py deleted file mode 100644 index 53e238a..0000000 --- a/src/rtichoke/plot/create_generic_plot_dict.py +++ /dev/null @@ -1,79 +0,0 @@ -""" Generic plotting dict""" - -plot_dicts = { - "ROC": { - "x": "FPR", - "y": "Sensitivity", - "reference": {"x": [0, 1], "y": [0, 1]}, - "xlabel": "1-Specificity", - "ylabel": "Sensitivity", - "hover_info": [ - "FPR", - "Sensitivity", - ], - }, - "LIFT": { - "x": "ppcr", - "y": "lift", - "reference": {"x": [0, 1], "y": [1, 1]}, - "xlabel": "ppcr", - "ylabel": "lift", - "hover_info": [ - "lift", - ], - }, - "gains": { - "x": "ppcr", - "y": "Sensitivity", - "reference": {"x": [0, 1], "y": [1, 1]}, - "xlabel": "ppcr", - "ylabel": "Sensitivity", - "hover_info": [ - "Sensitivity", - ], - }, - "PR": { - "x": "PPV", - "y": "Sensitivity", - "reference": {"x": [0, 0], "y": [0, 0]}, - "xlabel": "Precision", - "ylabel": "Recall", - "hover_info": [ - "PPV", - "Sensitivity", - ], - }, - "NB": { - "x": "probability_threshold", - "y": "Net_benefit", - "reference": {"x": [0, 0], "y": [0, 0]}, - "xlabel": "Prob. threshold", - "ylabel": "Net Benefit", - "hover_info": [ - "Net_benefit", - ], - }, - "calibration": { - "x": "prob_pred", - "y": "prob_true", - "reference": {"x": [0, 1], "y": [0, 1]}, - "xlabel": "Predicted", - "ylabel": "Observed", - }, -} - - -def create_generic_plot_dict(curve_type, stratification): - """returns a generic plot dict, according to curve_type and stratification method - - Args: - curve_type (str): One of the available curve types - stratification (str): stratification method - - Returns: - dictionary: generic plot dictionary to enable plotting - """ - generic_plot_dict = plot_dicts[curve_type] - generic_plot_dict["curve_type"] = curve_type - generic_plot_dict["stratification"] = stratification - return generic_plot_dict diff --git a/src/rtichoke/plot/plotting.py b/src/rtichoke/plot/plotting.py deleted file mode 100644 index 3c1f422..0000000 --- a/src/rtichoke/plot/plotting.py +++ /dev/null @@ -1,27 +0,0 @@ -"""Plotting module for rtichoke""" - -from .bokeh.plot_bokeh import plot_bokeh -from .create_generic_plot_dict import create_generic_plot_dict - - -def plot( - self, curve_type, stratification="probability_threshold", filename=None, api="bokeh" -): - """A method to fetch generic plot dict and call the specific plotting API - - Args: - curve_type (str): Curve type to produce ("ROC", "LIFT", "PR", "NB", or "calibration") - stratification (str, optional): Stratifiction method ("PPCR" or "probability_threshold"). - Defaults to "probability_threshold". - filename (str, optional): Filename for saving plot. Defaults to None. - api (str, optional): Which plotting API to use. Currently only "bokeh" is available. - Defaults to "bokeh". - - Returns: plot object - """ - self.validate_plot_inputs(curve_type, stratification) - - generic_plot_dict = create_generic_plot_dict(curve_type, stratification) - - if api == "bokeh": - return plot_bokeh(self, generic_plot_dict, filename) diff --git a/src/rtichoke/rtichoke.py b/src/rtichoke/rtichoke.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/rtichoke/summary_report/__init__.py b/src/rtichoke/summary_report/__init__.py deleted file mode 100644 index 87ebcbd..0000000 --- a/src/rtichoke/summary_report/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -""" -Subpackage for Summary Report -""" diff --git a/src/rtichoke/summary_report/summary_report.py b/src/rtichoke/summary_report/summary_report.py deleted file mode 100644 index 66777c7..0000000 --- a/src/rtichoke/summary_report/summary_report.py +++ /dev/null @@ -1,21 +0,0 @@ -""" -A module for Summary Report -""" - -from rtichoke.helpers.send_post_request_to_r_rtichoke import send_requests_to_rtichoke_r - - -def create_summary_report(probs, reals, url_api="http://localhost:4242/"): - """Create rtichoke Summary Report - - Args: - probs (_type_): _description_ - reals (_type_): _description_ - url_api (str, optional): _description_. Defaults to "http://localhost:4242/". - """ - rtichoke_response = send_requests_to_rtichoke_r( - dictionary_to_send={"probs": probs, "reals": reals}, - url_api=url_api, - endpoint="create_summary_report", - ) - print(rtichoke_response.json()[0].keys()) diff --git a/src/rtichoke/utility/__init__.py b/src/rtichoke/utility/__init__.py deleted file mode 100644 index b19d4f1..0000000 --- a/src/rtichoke/utility/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -""" -Subpackage for Decision Curves -""" diff --git a/src/rtichoke/utility/decision.py b/src/rtichoke/utility/decision.py deleted file mode 100644 index 4de7d0e..0000000 --- a/src/rtichoke/utility/decision.py +++ /dev/null @@ -1,98 +0,0 @@ -""" -A module for Summary Report -""" - -from typing import Dict, List, Optional -from pandas.core.frame import DataFrame -from plotly.graph_objs._figure import Figure -from rtichoke.helpers.send_post_request_to_r_rtichoke import create_rtichoke_curve -from rtichoke.helpers.send_post_request_to_r_rtichoke import plot_rtichoke_curve - - -def create_decision_curve( - probs: Dict[str, List[float]], - reals: Dict[str, List[int]], - decision_type: str = "conventional", - min_p_threshold: float = 0, - max_p_threshold: float = 1, - by: float = 0.01, - stratified_by: str = "probability_threshold", - size: Optional[int] = None, - color_values: List[str] = None, - url_api: str = "http://localhost:4242/", -) -> Figure: - """Create Decision Curve - - Args: - probs (Dict[str, List[float]]): _description_ - reals (Dict[str, List[int]]): _description_ - decision_type (str, optional): _description_. Defaults to "conventional". - min_p_threshold (float, optional): _description_. Defaults to 0. - max_p_threshold (float, optional): _description_. Defaults to 1. - by (float, optional): _description_. Defaults to 0.01. - stratified_by (str, optional): _description_. Defaults to "probability_threshold". - size (Optional[int], optional): _description_. Defaults to None. - color_values (List[str], optional): _description_. Defaults to None. - url_api (_type_, optional): _description_. Defaults to "http://localhost:4242/". - - Returns: - Figure: _description_ - """ - if decision_type == "conventional": - curve = "decision" - else: - curve = "interventions avoided" - - fig = create_rtichoke_curve( - probs, - reals, - by=by, - stratified_by=stratified_by, - size=size, - color_values=color_values, - url_api=url_api, - curve=curve, - min_p_threshold=min_p_threshold, - max_p_threshold=max_p_threshold, - ) - return fig - - -def plot_decision_curve( - performance_data: DataFrame, - decision_type: str, - min_p_threshold: int = 0, - max_p_threshold: int = 1, - size: Optional[int] = None, - color_values: List[str] = None, - url_api: str = "http://localhost:4242/", -) -> Figure: - """Plot Decision Curve - - Args: - performance_data (DataFrame): _description_ - decision_type (str): _description_ - min_p_threshold (int, optional): _description_. Defaults to 0. - max_p_threshold (int, optional): _description_. Defaults to 1. - size (Optional[int], optional): _description_. Defaults to None. - color_values (List[str], optional): _description_. Defaults to None. - url_api (_type_, optional): _description_. Defaults to "http://localhost:4242/". - - Returns: - Figure: _description_ - """ - if decision_type == "conventional": - curve = "decision" - else: - curve = "interventions avoided" - - fig = plot_rtichoke_curve( - performance_data, - size=size, - color_values=color_values, - url_api=url_api, - curve=curve, - min_p_threshold=min_p_threshold, - max_p_threshold=max_p_threshold, - ) - return fig diff --git a/src/tests/__init__.py b/src/tests/__init__.py deleted file mode 100644 index c3a79ad..0000000 --- a/src/tests/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -""" -testing module - -""" diff --git a/src/tests/test_calculate_performance_data.py b/src/tests/test_calculate_performance_data.py deleted file mode 100644 index c002533..0000000 --- a/src/tests/test_calculate_performance_data.py +++ /dev/null @@ -1,95 +0,0 @@ -"""UnitTest class to test performance tables""" -import unittest -import numpy as np -import pandas as pd -from numpy.testing import assert_allclose -from rtichoke import Rtichoke - -# UnitTests created with ChatGPT! :-) - - -class TestPreparePerformanceTable(unittest.TestCase): - """UnitTest class to test performance tables""" - - def setUp(self): - """Simulate an Rtichoke object""" - probs = {"pop1": np.array([0.7, 0.8, 0.9, 0.4, 0.2, 0.6, 0.5])} - reals = {"pop1": np.array([1, 1, 1, 0, 0, 1, 0])} - self.r = Rtichoke(probs=probs, reals=reals, by=0.1) - - def test_performance_table_type_and_size(self): - """Test performance table type and size""" - self.assertIsInstance(self.r.performance_table_pt, pd.DataFrame) - self.assertIsInstance(self.r.performance_table_ppcr, pd.DataFrame) - - self.assertEqual(self.r.performance_table_pt.shape, (11, 15)) - self.assertEqual(self.r.performance_table_ppcr.shape, (11, 15)) - - def test_performance_data_contains_expected_columns(self): - """Test whether performance table contains expected columns""" - result_cols_pt = self.r.performance_table_pt.columns - result_cols_ppcr = self.r.performance_table_ppcr.columns - - expected_cols = [ - "Population", - "probability_threshold", - "ppcr", - "predicted_positives", - "TP", - "FP", - "FN", - "TN", - "Sensitivity", - "Specificity", - "FPR", - "PPV", - "NPV", - "lift", - "Net_benefit", - ] - self.assertCountEqual(result_cols_pt, expected_cols) - self.assertCountEqual(result_cols_ppcr, expected_cols) - - def test_performance_data_contains_correct_thresholds(self): - """Test thresholds within performance data table""" - expected_thresholds = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] - result_thresholds_pt = self.r.performance_table_pt["probability_threshold"] - result_thresholds_ppcr = self.r.performance_table_ppcr["ppcr"][-1::-1] - - assert_allclose(result_thresholds_pt, expected_thresholds) - assert_allclose(result_thresholds_ppcr, expected_thresholds) - - def test_performance_data_contains_expected_population_name(self): - """Test populations within performance data table""" - expected_pop_name = ["pop1"] - pt_table_pop_name = self.r.performance_table_pt["Population"].unique() - ppcr_table_pop_name = self.r.performance_table_ppcr["Population"].unique() - - self.assertEqual(expected_pop_name, pt_table_pop_name) - self.assertEqual(expected_pop_name, ppcr_table_pop_name) - - def test_performance_data_with_two_populations(self): - """Test performance data table creation on two populations""" - probs = { - "pop1": np.array([0.7, 0.8, 0.9, 0.4, 0.2, 0.6, 0.5]), - "pop2": np.array([0.7, 0.8, 0.9, 0.4, 0.2, 0.6, 0.5]), - } - reals = { - "pop1": np.array([1, 1, 1, 0, 0, 1, 0]), - "pop2": np.array([1, 1, 1, 0, 0, 1, 0]), - } - r = Rtichoke(probs=probs, reals=reals, by=0.1) - - expected_pop_name = ["pop1", "pop2"] - pt_table_pop_name = r.performance_table_pt["Population"].unique() - ppcr_table_pop_name = r.performance_table_ppcr["Population"].unique() - - self.assertEqual(expected_pop_name, pt_table_pop_name) - self.assertEqual(expected_pop_name, ppcr_table_pop_name) - - self.assertEqual(r.performance_table_pt.shape, (15, 22)) - self.assertEqual(r.performance_table_ppcr.shape, (15, 22)) - - -if __name__ == "__main__": - unittest.main() diff --git a/src/tests/test_vericiation_tests.py b/src/tests/test_vericiation_tests.py deleted file mode 100644 index bd402d4..0000000 --- a/src/tests/test_vericiation_tests.py +++ /dev/null @@ -1,155 +0,0 @@ -"""Sub-module to test Rtichoke validations""" -import unittest -import numpy as np -from rtichoke.helpers.validations import check_probs, check_probs_vs_reals, check_reals -from rtichoke import Rtichoke - - -class TestCheckProbs(unittest.TestCase): - """UnitTest to test check_probs method""" - - def test_valid_probs(self): - """Testing valid probs""" - try: - check_probs(np.array([0.1, 0.3, 0.6, 0.9])) - except ValueError as e: - self.fail(f"Should return None when OK: {e}") - - def test_invalid_probs_low(self): - """Test check_probs with negative probability""" - with self.assertRaises(ValueError): - check_probs(np.array([-0.1, 0.3, 0.6, 0.9])) - - def test_invalid_probs_high(self): - """Test check_probs with probability > 1.0""" - with self.assertRaises(ValueError): - check_probs(np.array([0.1, 0.3, 1.1, 0.9])) - - -### - - -class TestCheckProbsVsReals(unittest.TestCase): - """UnitTest class to test check_probs_vs_reals""" - - def test_valid_probs_vs_reals(self): - """Test check_probs_vs_reals: valid probs and reals""" - try: - probs = np.array([0.1, 0.3, 0.6, 0.9]) - reals = np.array([0, 1, 0, 1]) - check_probs_vs_reals(probs, reals) - except ValueError as e: - self.fail(f"Should return None when OK: {e}") - - def test_invalid_shape(self): - """Test check_probs_vs_reals: shape mis-match""" - probs = np.array([0.1, 0.3, 0.6, 0.9]) - reals = np.array([0, 1]) - with self.assertRaises(ValueError): - check_probs_vs_reals(probs, reals) - - def test_invalid_len(self): - """Test check_probs_vs_reals: single observation""" - probs = np.array([0.1]) - reals = np.array([0]) - with self.assertRaises(ValueError): - check_probs_vs_reals(probs, reals) - - -class TestCheckReals(unittest.TestCase): - """UnitTest class to test check_reals""" - - def test_valid_reals(self): - """Test check_reals: valid input""" - # Test a valid input - try: - reals = np.array([0, 1, 1, 0, 1]) - check_reals(reals) - except ValueError as e: - self.fail(f"Should return None when OK: {e}") - - def test_reals_with_negative_values(self): - """Test check_reals: negative reals""" - # Test that an exception is raised when reals include negative values - reals = np.array([-1, 0, 1]) - with self.assertRaises(ValueError): - check_reals(reals) - - def test_reals_with_a_single_value(self): - """Test check_reals: a single unique reals value""" - # Test that an exception is raised when reals include only positive outcomes - reals = np.array([1, 1, 1]) - with self.assertRaises(ValueError): - check_reals(reals) - - def test_reals_with_non_binary_values(self): - """Test check_reals: multi-nomial reals""" - # Test that an exception is raised when reals include non-binary values - reals = np.array([0, 1, 2]) - with self.assertRaises(ValueError): - check_reals(reals) - - -class TestCheckBy(unittest.TestCase): - """UnitTest class to test check_by""" - - def test_valid_by_value(self): - """Test check_by: valid value""" - Rtichoke(probs=np.array([0.1, 0.5, 0.9]), reals=np.array([0, 1, 0]), by=0.25) - - def test_by_not_a_float(self): - """Test check_by: `by` argument not a float""" - # Test that an exception is raised when the input is not a float - with self.assertRaises(ValueError) as e: - Rtichoke( - probs=np.array([0.1, 0.5, 0.9]), reals=np.array([0, 1, 0]), by="0.25" - ) - self.assertEqual( - str(e.exception), "Argument `by` must be a float, 0 > `by` <= 0.5" - ) - - def test_by_out_of_range(self): - """Test check_by: `by` argument out of range""" - # Test that an exception is raised when the input is out of range - with self.assertRaises(ValueError) as e: - Rtichoke( - probs=np.array([0.1, 0.5, 0.9]), reals=np.array([0, 1, 0]), by="0.6" - ) - self.assertEqual( - str(e.exception), "Argument `by` must be a float, 0 > `by` <= 0.5" - ) - - -class TestValidateInputs(unittest.TestCase): - """UnitTest class for end-to-end validation tests""" - - def test_validate_inputs(self): - """End-to-end test with valid inputs""" - r = Rtichoke(probs=np.array([0.1, 0.5, 0.9]), reals=np.array([0, 1, 0]), by=0.2) - - # test for valid input - probs = np.array([0.1, 0.5, 0.9]) - reals = np.array([0, 1, 0]) - self.assertIsNone(r.validate_inputs(probs, reals)) - - # test for un-equal sized arrays - with self.assertRaises(ValueError) as e: - r.validate_inputs(np.array([0.1, 0.5]), np.array([0, 1, 0])) - self.assertEqual( - str(e.exception), - "Probs and reals shapes are inconsistent ((2,) and (3,))", - ) - - # test for invalid probs - with self.assertRaises(ValueError) as e: - r.validate_inputs(np.array([-0.1, 0.5, 1.1]), np.array([0, 1, 0])) - self.assertEqual(str(e.exception), "Probs must be within [0, 1]") - - # test for invalid reals - with self.assertRaises(ValueError) as e: - r.validate_inputs(np.array([0.1, 0.5, 0.9]), np.array([0, 1, 2])) - self.assertEqual(str(e.exception), "Reals must include only 0's and 1's") - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/test_rtichoke.py b/tests/test_rtichoke.py deleted file mode 100644 index 83b686c..0000000 --- a/tests/test_rtichoke.py +++ /dev/null @@ -1,10 +0,0 @@ -""" -A module for tests -""" - -# from rtichoke import rtichoke - -def test_addition(): - expected = 4 - actual = 2 + 2 - assert actual == expected \ No newline at end of file