From 14defe63b6e912e0d3f604587f93356aabbeaa7c Mon Sep 17 00:00:00 2001 From: SIKAI ZHANG <34108862+MatthewSZhang@users.noreply.github.com> Date: Tue, 15 Oct 2024 10:27:58 +0800 Subject: [PATCH] DOC fix wrong pick in plot_redundancy --- README.rst | 2 +- examples/plot_redundancy.py | 15 +++++++-------- fastcan/_utils.py | 11 +++++++++++ pixi.lock | 4 ++++ 4 files changed, 23 insertions(+), 9 deletions(-) diff --git a/README.rst b/README.rst index 67ccd4a..a0c952a 100644 --- a/README.rst +++ b/README.rst @@ -39,7 +39,7 @@ FastCan is a feature selection method, which has following advantages: #. Skip redundant features. -#. Evalaute relative usefulness of features. +#. Evaluate relative usefulness of features. Check `Home Page `_ for more information. diff --git a/examples/plot_redundancy.py b/examples/plot_redundancy.py index deebde3..3d2e003 100644 --- a/examples/plot_redundancy.py +++ b/examples/plot_redundancy.py @@ -1,7 +1,7 @@ """ -=================================================== -Feature selection performance on redundant features -=================================================== +================================= +Performance on redundant features +================================= .. currentmodule:: fastcan @@ -109,8 +109,7 @@ def get_n_missed( n_missed_dep = len( np.setdiff1d(dep_info_ids+redundant_ids, selected_ids) )-n_redundant - if n_missed_dep < 0: - n_missed_dep = 0 + n_missed_dep = max(n_missed_dep, 0) return n_missed_indep+n_missed_dep # %% @@ -160,7 +159,7 @@ def get_n_missed( N_REPEATED = 10 selector_dict = { - "fastcan": FastCan(N_SELECTED, verbose=0), + "fastcan": FastCan(N_SELECTED, tol=1e-7, verbose=0), "skb_reg": SelectKBest(f_regression, k=N_SELECTED), "skb_mir": SelectKBest(mutual_info_regression, k=N_SELECTED), "sfm_lsvr": SelectFromModel(lsvr, max_features=N_SELECTED, threshold=-np.inf), @@ -179,7 +178,7 @@ def get_n_missed( n_missed = np.zeros((N_REPEATED, N_SELECTORS), dtype=int) for i in range(N_REPEATED): - X, y = make_redundant( + data, target = make_redundant( n_samples=N_SAMPLES, n_features=N_FEATURES, dep_info_ids=DEP_INFO_IDS, @@ -188,7 +187,7 @@ def get_n_missed( random_seed=i, ) for j, selector in enumerate(selector_dict.values()): - result_ids = selector.fit(X, y).get_support(indices=True) + result_ids = selector.fit(data, target).get_support(indices=True) n_missed[i, j] = get_n_missed( dep_info_ids=DEP_INFO_IDS, indep_info_ids=INDEP_INFO_IDS, diff --git a/fastcan/_utils.py b/fastcan/_utils.py index 27b9174..c4c4b41 100644 --- a/fastcan/_utils.py +++ b/fastcan/_utils.py @@ -80,6 +80,17 @@ def ols(X, y, t=1): scores : ndarray of shape (n_features_to_select,), dtype=float The scores of selected features. The order of the scores is corresponding to the feature selection process. + + Examples + -------- + >>> from fastcan import ols + >>> X = [[1, 0, 0], [0, 1, 0], [0, 0, 1], [0, 0, 0]] + >>> y = [1, 0, 1, 0] + >>> indices, scores = ols(X, y, 2) + >>> indices + array([0, 2]) + >>> scores + array([0.5, 0.5]) """ X, y = check_X_y(X, y, dtype=float, ensure_2d=True) n_features = X.shape[1] diff --git a/pixi.lock b/pixi.lock index a81977b..5975df7 100644 --- a/pixi.lock +++ b/pixi.lock @@ -1439,6 +1439,7 @@ packages: - python_abi 3.12.* *_cp312 - tomli license: Apache-2.0 + license_family: APACHE purls: - pkg:pypi/coverage?source=hash-mapping size: 361418 @@ -1457,6 +1458,7 @@ packages: - python_abi 3.12.* *_cp312 - tomli license: Apache-2.0 + license_family: APACHE purls: - pkg:pypi/coverage?source=hash-mapping size: 361331 @@ -1477,6 +1479,7 @@ packages: - vc >=14.2,<15 - vc14_runtime >=14.29.30139 license: Apache-2.0 + license_family: APACHE purls: - pkg:pypi/coverage?source=hash-mapping size: 389654 @@ -1496,6 +1499,7 @@ packages: - python_abi 3.12.* *_cp312 - tomli license: Apache-2.0 + license_family: APACHE purls: - pkg:pypi/coverage?source=hash-mapping size: 362490