From 4ba97f007c9c99c8101f780922b4dbb7a2b02aa7 Mon Sep 17 00:00:00 2001 From: "Egor.Kraev" Date: Tue, 14 May 2024 14:06:49 +0100 Subject: [PATCH] Update docstrings and arg descriptions in example notebook, to better describe solver arg --- notebooks/Finding interesting segments.ipynb | 21 +++++++++----------- wise_pizza/explain.py | 11 ++++++++-- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/notebooks/Finding interesting segments.ipynb b/notebooks/Finding interesting segments.ipynb index ea969ee..b8f8b9d 100644 --- a/notebooks/Finding interesting segments.ipynb +++ b/notebooks/Finding interesting segments.ipynb @@ -579,9 +579,8 @@ "- `min_depth`: Minimum number of dimension to constrain in segment definition\n", "- `max_depth`: Maximum number of dimension to constrain in segment definition\n", "- `solver`: If this equals to \"lp\" uses the LP solver, else uses the (recommended) Lasso solver\n", - " - `\"lasso\"`: Lasso-based finder of unusual segments\n", - " - `\"lp\"`: LP-based finder of unusual segments\n", - " - `\"tree\"`: tree-based finder of unusual segments\n", + " - `\"lasso\"`: Use to find most unusual segments, which may overlap and most likely won't contain the whole dataset\n", + " - `\"tree\"`: Use to divide up the whole dataset into non-overlapping segments that are as homogenous as possible\n", "- `cluster_values`: In addition to single-value slices, consider slices that consist of a\n", " group of segments from the same dimension with similar naive averages\n", " - `True`: to use cluster values, you can them using `sf.relevant_cluster_names`\n", @@ -784,9 +783,8 @@ "- `min_depth`: Minimum number of dimension to constrain in segment definition\n", "- `max_depth`: Maximum number of dimension to constrain in segment definition\n", "- `solver`: If this equals to \"lp\" uses the LP solver, else uses the (recommended) Lasso solver \n", - " - `\"lasso\"`: Lasso-based finder of unusual segments\n", - " - `\"lp\"`: LP-based finder of unusual segments\n", - " - `\"tree\"`: tree-based finder of unusual segments\n", + " - `\"lasso\"`: Use to find most unusual segments, which may overlap and most likely won't contain the whole dataset\n", + " - `\"tree\"`: Use to divide up the whole dataset into non-overlapping segments that are as homogenous as possible\n", "- `how`:\n", " - `\"totals\"`: to only decompose segment totals (ignoring size vs average contribution)\n", " - `\"split_fits\"`: to separately decompose contribution of size changes and average changes\n", @@ -1156,9 +1154,8 @@ "- `min_depth`: Minimum number of dimension to constrain in segment definition\n", "- `max_depth`: Maximum number of dimension to constrain in segment definition\n", "- `solver`: If this equals to \"lp\" uses the LP solver, else uses the (recommended) Lasso solver \n", - " - `\"lasso\"`: Lasso-based finder of unusual segments\n", - " - `\"lp\"`: LP-based finder of unusual segments\n", - " - `\"tree\"`: tree-based finder of unusual segments\n", + " - `\"lasso\"`: Use to find most unusual segments, which may overlap and most likely won't contain the whole dataset\n", + " - `\"tree\"`: Use to divide up the whole dataset into non-overlapping segments that are as homogenous as possible\n", "- `how`:\n", " - `\"totals\"`: to only decompose segment totals (ignoring size vs average contribution)\n", " - `\"split_fits\"`: to separately decompose contribution of size changes and average changes\n", @@ -1418,9 +1415,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python [conda env:pizza3.11]", "language": "python", - "name": "python3" + "name": "conda-env-pizza3.11-py" }, "language_info": { "codemirror_mode": { @@ -1432,7 +1429,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/wise_pizza/explain.py b/wise_pizza/explain.py index ab1066a..c593483 100644 --- a/wise_pizza/explain.py +++ b/wise_pizza/explain.py @@ -48,7 +48,9 @@ def explain_changes_in_average( @param max_segments: Maximum number of segments to find, defaults to min_segments @param min_depth: Minimum number of dimension to constrain in segment definition @param max_depth: Maximum number of dimension to constrain in segment definition - @param solver: If this equals to "lp" uses the LP solver, else uses the (recommended) Lasso solver + @param solver: "lasso" for most unusual, possibly overlapping segments; + "tree" to divide the whole dataset into non-overlapping segments, + as homogenous as possible. @param how: "totals" to only decompose segment totals (ignoring size vs average contribution) "split_fits" to separately decompose contribution of size changes and average changes "extra_dim" to treat size vs average change contribution as an additional dimension @@ -145,7 +147,9 @@ def explain_changes_in_totals( @param max_segments: Maximum number of segments to find, defaults to min_segments @param min_depth: Minimum number of dimension to constrain in segment definition @param max_depth: Maximum number of dimension to constrain in segment definition - @param solver: If this equals to "lp" uses the LP solver, else uses the (recommended) Lasso solver + @param solver: "lasso" for most unusual, possibly overlapping segments; + "tree" to divide the whole dataset into non-overlapping segments, + as homogenous as possible. @param how: "totals" to only decompose segment totals (ignoring size vs average contribution) "split_fits" to separately decompose contribution of size changes and average changes "extra_dim" to treat size vs average change contribution as an additional dimension @@ -290,6 +294,9 @@ def explain_levels( @param max_segments: Maximum number of segments to find, defaults to min_segments @param min_depth: Minimum number of dimension to constrain in segment definition @param max_depth: Maximum number of dimension to constrain in segment definition + @param solver: "lasso" for most unusual, possibly overlapping segments; + "tree" to divide the whole dataset into non-overlapping segments, + as homogenous as possible. @param solver: If this equals to "lp" uses the LP solver, else uses the (recommended) Lasso solver @param verbose: If set to a truish value, lots of debug info is printed to console @param force_add_up: Force the contributions of chosen segments to add up to zero