Merge pull request #12 from MatthewSZhang/docs

DOC fix wrong pick in plot_redundancy
scikit-learn-contrib · Oct 15, 2024 · 114b0dc · 114b0dc
2 parents 839b4e0 + 14defe6
commit 114b0dc
Show file tree

Hide file tree

Showing 4 changed files with 23 additions and 9 deletions.
diff --git a/README.rst b/README.rst
@@ -39,7 +39,7 @@ FastCan is a feature selection method, which has following advantages:
 
 #. Skip redundant features.
 
-#. Evalaute relative usefulness of features.
+#. Evaluate relative usefulness of features.
 
 Check `Home Page <https://fastcan.readthedocs.io/en/latest/?badge=latest>`_ for more information.
 

diff --git a/examples/plot_redundancy.py b/examples/plot_redundancy.py
@@ -1,7 +1,7 @@
 """
-===================================================
-Feature selection performance on redundant features
-===================================================
+=================================
+Performance on redundant features
+=================================
 
 .. currentmodule:: fastcan
 
@@ -109,8 +109,7 @@ def get_n_missed(
     n_missed_dep = len(
         np.setdiff1d(dep_info_ids+redundant_ids, selected_ids)
     )-n_redundant
-    if n_missed_dep < 0:
-        n_missed_dep = 0
+    n_missed_dep = max(n_missed_dep, 0)
     return n_missed_indep+n_missed_dep
 
 # %%
@@ -160,7 +159,7 @@ def get_n_missed(
 N_REPEATED = 10
 
 selector_dict = {
-    "fastcan": FastCan(N_SELECTED, verbose=0),
+    "fastcan": FastCan(N_SELECTED, tol=1e-7, verbose=0),
     "skb_reg": SelectKBest(f_regression, k=N_SELECTED),
     "skb_mir": SelectKBest(mutual_info_regression, k=N_SELECTED),
     "sfm_lsvr": SelectFromModel(lsvr, max_features=N_SELECTED, threshold=-np.inf),
@@ -179,7 +178,7 @@ def get_n_missed(
 n_missed = np.zeros((N_REPEATED, N_SELECTORS), dtype=int)
 
 for i in range(N_REPEATED):
-    X, y = make_redundant(
+    data, target = make_redundant(
         n_samples=N_SAMPLES,
         n_features=N_FEATURES,
         dep_info_ids=DEP_INFO_IDS,
@@ -188,7 +187,7 @@ def get_n_missed(
         random_seed=i,
     )
     for j, selector in enumerate(selector_dict.values()):
-        result_ids = selector.fit(X, y).get_support(indices=True)
+        result_ids = selector.fit(data, target).get_support(indices=True)
         n_missed[i, j] = get_n_missed(
             dep_info_ids=DEP_INFO_IDS,
             indep_info_ids=INDEP_INFO_IDS,

diff --git a/fastcan/_utils.py b/fastcan/_utils.py
@@ -80,6 +80,17 @@ def ols(X, y, t=1):
     scores : ndarray of shape (n_features_to_select,), dtype=float
         The scores of selected features. The order of
         the scores is corresponding to the feature selection process.
+
+    Examples
+    --------
+    >>> from fastcan import ols
+    >>> X = [[1, 0, 0], [0, 1, 0], [0, 0, 1], [0, 0, 0]]
+    >>> y = [1, 0, 1, 0]
+    >>> indices, scores = ols(X, y, 2)
+    >>> indices
+    array([0, 2])
+    >>> scores
+    array([0.5, 0.5])
     """
     X, y = check_X_y(X, y, dtype=float, ensure_2d=True)
     n_features = X.shape[1]

diff --git a/pixi.lock b/pixi.lock
@@ -1439,6 +1439,7 @@ packages:
   - python_abi 3.12.* *_cp312
   - tomli
   license: Apache-2.0
+  license_family: APACHE
   purls:
   - pkg:pypi/coverage?source=hash-mapping
   size: 361418
@@ -1457,6 +1458,7 @@ packages:
   - python_abi 3.12.* *_cp312
   - tomli
   license: Apache-2.0
+  license_family: APACHE
   purls:
   - pkg:pypi/coverage?source=hash-mapping
   size: 361331
@@ -1477,6 +1479,7 @@ packages:
   - vc >=14.2,<15
   - vc14_runtime >=14.29.30139
   license: Apache-2.0
+  license_family: APACHE
   purls:
   - pkg:pypi/coverage?source=hash-mapping
   size: 389654
@@ -1496,6 +1499,7 @@ packages:
   - python_abi 3.12.* *_cp312
   - tomli
   license: Apache-2.0
+  license_family: APACHE
   purls:
   - pkg:pypi/coverage?source=hash-mapping
   size: 362490