Skip to content

Commit

Permalink
Merge pull request #12 from MatthewSZhang/docs
Browse files Browse the repository at this point in the history
DOC fix wrong pick in plot_redundancy
  • Loading branch information
MatthewSZhang authored Oct 15, 2024
2 parents 839b4e0 + 14defe6 commit 114b0dc
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 9 deletions.
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ FastCan is a feature selection method, which has following advantages:

#. Skip redundant features.

#. Evalaute relative usefulness of features.
#. Evaluate relative usefulness of features.

Check `Home Page <https://fastcan.readthedocs.io/en/latest/?badge=latest>`_ for more information.

Expand Down
15 changes: 7 additions & 8 deletions examples/plot_redundancy.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
===================================================
Feature selection performance on redundant features
===================================================
=================================
Performance on redundant features
=================================
.. currentmodule:: fastcan
Expand Down Expand Up @@ -109,8 +109,7 @@ def get_n_missed(
n_missed_dep = len(
np.setdiff1d(dep_info_ids+redundant_ids, selected_ids)
)-n_redundant
if n_missed_dep < 0:
n_missed_dep = 0
n_missed_dep = max(n_missed_dep, 0)
return n_missed_indep+n_missed_dep

# %%
Expand Down Expand Up @@ -160,7 +159,7 @@ def get_n_missed(
N_REPEATED = 10

selector_dict = {
"fastcan": FastCan(N_SELECTED, verbose=0),
"fastcan": FastCan(N_SELECTED, tol=1e-7, verbose=0),
"skb_reg": SelectKBest(f_regression, k=N_SELECTED),
"skb_mir": SelectKBest(mutual_info_regression, k=N_SELECTED),
"sfm_lsvr": SelectFromModel(lsvr, max_features=N_SELECTED, threshold=-np.inf),
Expand All @@ -179,7 +178,7 @@ def get_n_missed(
n_missed = np.zeros((N_REPEATED, N_SELECTORS), dtype=int)

for i in range(N_REPEATED):
X, y = make_redundant(
data, target = make_redundant(
n_samples=N_SAMPLES,
n_features=N_FEATURES,
dep_info_ids=DEP_INFO_IDS,
Expand All @@ -188,7 +187,7 @@ def get_n_missed(
random_seed=i,
)
for j, selector in enumerate(selector_dict.values()):
result_ids = selector.fit(X, y).get_support(indices=True)
result_ids = selector.fit(data, target).get_support(indices=True)
n_missed[i, j] = get_n_missed(
dep_info_ids=DEP_INFO_IDS,
indep_info_ids=INDEP_INFO_IDS,
Expand Down
11 changes: 11 additions & 0 deletions fastcan/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,17 @@ def ols(X, y, t=1):
scores : ndarray of shape (n_features_to_select,), dtype=float
The scores of selected features. The order of
the scores is corresponding to the feature selection process.
Examples
--------
>>> from fastcan import ols
>>> X = [[1, 0, 0], [0, 1, 0], [0, 0, 1], [0, 0, 0]]
>>> y = [1, 0, 1, 0]
>>> indices, scores = ols(X, y, 2)
>>> indices
array([0, 2])
>>> scores
array([0.5, 0.5])
"""
X, y = check_X_y(X, y, dtype=float, ensure_2d=True)
n_features = X.shape[1]
Expand Down
4 changes: 4 additions & 0 deletions pixi.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1439,6 +1439,7 @@ packages:
- python_abi 3.12.* *_cp312
- tomli
license: Apache-2.0
license_family: APACHE
purls:
- pkg:pypi/coverage?source=hash-mapping
size: 361418
Expand All @@ -1457,6 +1458,7 @@ packages:
- python_abi 3.12.* *_cp312
- tomli
license: Apache-2.0
license_family: APACHE
purls:
- pkg:pypi/coverage?source=hash-mapping
size: 361331
Expand All @@ -1477,6 +1479,7 @@ packages:
- vc >=14.2,<15
- vc14_runtime >=14.29.30139
license: Apache-2.0
license_family: APACHE
purls:
- pkg:pypi/coverage?source=hash-mapping
size: 389654
Expand All @@ -1496,6 +1499,7 @@ packages:
- python_abi 3.12.* *_cp312
- tomli
license: Apache-2.0
license_family: APACHE
purls:
- pkg:pypi/coverage?source=hash-mapping
size: 362490
Expand Down

0 comments on commit 114b0dc

Please sign in to comment.