updated notebooks

socialfoundations · Aug 28, 2024 · 6f8e709 · 6f8e709
1 parent b377c43
commit 6f8e709
Show file tree

Hide file tree

Showing 7 changed files with 585 additions and 221 deletions.
diff --git a/folktexts/classifier/base.py b/folktexts/classifier/base.py
@@ -22,7 +22,7 @@
 
 from .._utils import hash_dict, hash_function
 
-DEFAULT_CONTEXT_SIZE = 500
+DEFAULT_CONTEXT_SIZE = 600
 DEFAULT_BATCH_SIZE = 16
 
 SCORE_COL_NAME = "risk_score"

diff --git a/folktexts/cli/eval_feature_importance.py b/folktexts/cli/eval_feature_importance.py
@@ -21,7 +21,7 @@
 
 DEFAULT_TASK_NAME = "ACSIncome"
 
-DEFAULT_CONTEXT_SIZE = 500
+DEFAULT_CONTEXT_SIZE = 600
 DEFAULT_BATCH_SIZE = 30
 DEFAULT_SEED = 42
 

diff --git a/folktexts/cli/run_acs_benchmark.py b/folktexts/cli/run_acs_benchmark.py
@@ -10,7 +10,7 @@
 DEFAULT_ACS_TASK = "ACSIncome"
 
 DEFAULT_BATCH_SIZE = 30
-DEFAULT_CONTEXT_SIZE = 500
+DEFAULT_CONTEXT_SIZE = 600
 DEFAULT_SEED = 42
 
 

diff --git a/folktexts/plotting.py b/folktexts/plotting.py
@@ -238,7 +238,7 @@ def show_or_save(fig, fig_name: str):
 
         # If the group is too small of a fraction, skip (curve will be too erratic)
         if len(group_indices) / len(sensitive_attribute) < group_size_threshold:
-            logging.warning(f"Skipping group {group_value_map(s_value)} plot as it's too small.")
+            logging.info(f"Skipping group {group_value_map(s_value)} plot as it's too small.")
             continue
 
         # Plot global calibration curve
@@ -257,29 +257,4 @@ def show_or_save(fig, fig_name: str):
     plt.title("Calibration curve per sub-group" + model_str)
     show_or_save(fig, "calibration_curve_per_subgroup")
 
-    # ###
-    # Plot scores distribution per group
-    # ###
-    # TODO: make a decent score-distribution plot... # TODO: try score CDFs!
-    # hist_bin_edges = np.histogram_bin_edges(y_pred_scores, bins=10)
-    # for idx, s_value in enumerate(np.unique(sensitive_attribute)):
-    #     group_indices = np.argwhere(sensitive_attribute == s_value).flatten()
-    #     group_y_pred_scores = y_pred_scores[group_indices]
-    #     is_first_group = (idx == 0)
-    #     if is_first_group:
-    #         fig, ax = plt.subplots()
-    #     sns.histplot(
-    #         group_y_pred_scores,
-    #         bins=hist_bin_edges,
-    #         stat="density",
-    #         kde=False,
-    #         color=group_colors[idx],
-    #         label=group_value_map(s_value),
-    #         ax=ax,
-    #     )
-
-    # plt.legend()
-    # plt.title("Score distribution per sub-group" + model_str)
-    # results["score_distribution_per_subgroup_path"] = save_fig(fig, "score_distribution_per_subgroup", imgs_dir)
-
     return results
diff --git a/notebooks/detailed-example.ipynb b/notebooks/detailed-example.ipynb
diff --git a/notebooks/minimal-example.ipynb b/notebooks/minimal-example.ipynb
diff --git a/notebooks/minimal-example_web-API-model.ipynb b/notebooks/minimal-example_web-API-model.ipynb