From cbded1dfebb104c1f7f22dacc79909cbd9649749 Mon Sep 17 00:00:00 2001 From: Felix Held Date: Tue, 21 May 2024 18:35:35 +0200 Subject: [PATCH] fix formatting --- README.ipynb | 4 + README.md | 926 ++++----------------------------------------------- 2 files changed, 60 insertions(+), 870 deletions(-) diff --git a/README.ipynb b/README.ipynb index 6d4ef54..81547c0 100644 --- a/README.ipynb +++ b/README.ipynb @@ -22,6 +22,10 @@ "metadata": {}, "outputs": [], "source": [ + "from sklearn import set_config\n", + "\n", + "set_config(display=\"text\") # show text representation of sklearn estimators\n", + "\n", "import solrcmf\n", "import numpy as np\n", "from numpy.random import default_rng\n", diff --git a/README.md b/README.md index d49ead3..94322ce 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,10 @@ A simple usage example is shown below: ```python +from sklearn import set_config + +set_config(display="text") # show text representation of sklearn estimators + import solrcmf import numpy as np from numpy.random import default_rng @@ -57,8 +61,7 @@ xs_sim = solrcmf.simulate( # - "vs", the simulated orthogonal factors ``` -Estimation via multi-block ADMM is encapsulated in the class `SolrCMF` which has -a convenient scikit-learn interface. +Estimation via multi-block ADMM is encapsulated in the class `SolrCMF` which has a convenient scikit-learn interface. ```python @@ -123,419 +126,14 @@ est.fit(xs_scaled, vs=est_init.vs_, ds=est_init.ds_, us=est_init.vs_) -
SolrCMF(factor_penalty=0.08, factor_pruning=False, init='custom',
-        max_iter=100000, max_rank=10, mu=10, structure_penalty=0.05)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
+ SolrCMF(factor_penalty=0.08, factor_pruning=False, init='custom', + max_iter=100000, max_rank=10, mu=10, structure_penalty=0.05) Estimates for $D_{ij}$ are then in `est.ds_` and estimates for $V_i$ in `est.vs_`. -Scale back to original scale +Scale back to original scale. ```python @@ -597,439 +195,23 @@ est_cv.fit(xs_scaled, vs=[est_init.vs_], ds=[est_init.ds_], us=[est_init.vs_]) -
SolrCMFCV(factor_penalty=array([0.36792423, 0.06941587, 0.10752516, 0.43376266, 0.53883355,
-       0.95854522, 0.10676321, 0.62188201, 0.08373001, 0.07400256,
-       0.44430964, 0.19013555, 0.53937187, 0.11657228, 0.09469818,
-       0.25880558, 0.07063052, 0.11524739, 0.08227149, 0.13042509,
-       0.49534386, 0.07178925, 0.3126294 , 0.28824926, 0.25066523,
-       0.2132999 , 0.36531926, 0.64150673, 0.08124273, 0...
-       0.71015324, 0.1375872 , 0.10718306, 0.73360059, 0.0774336 ,
-       0.05972806, 0.12817684, 0.48768923, 0.40007808, 0.96196336,
-       0.14680267, 0.11424985, 0.15524923, 0.52084544, 0.09501248,
-       0.85510326, 0.23217319, 0.52223399, 0.59602222, 0.2098567 ,
-       0.46080418, 0.14908991, 0.56755986, 0.59005505, 0.27265958,
-       0.09611405, 0.91465952, 0.85313787, 0.32016594, 0.95285913,
-       0.22548781, 0.15398784, 0.19865442, 0.05737153, 0.25905621]))
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
- - - -CV results can be found in the attribute `est_cv.cv_results_` and can be easily converted to a Pandas `DataFrame`. The best result corresponds to the row with index `est_cv.best_index_`. + SolrCMFCV(factor_penalty=array([0.36792423, 0.06941587, 0.10752516, 0.43376266, 0.53883355, + 0.95854522, 0.10676321, 0.62188201, 0.08373001, 0.07400256, + 0.44430964, 0.19013555, 0.53937187, 0.11657228, 0.09469818, + 0.25880558, 0.07063052, 0.11524739, 0.08227149, 0.13042509, + 0.49534386, 0.07178925, 0.3126294 , 0.28824926, 0.25066523, + 0.2132999 , 0.36531926, 0.64150673, 0.08124273, 0... + 0.71015324, 0.1375872 , 0.10718306, 0.73360059, 0.0774336 , + 0.05972806, 0.12817684, 0.48768923, 0.40007808, 0.96196336, + 0.14680267, 0.11424985, 0.15524923, 0.52084544, 0.09501248, + 0.85510326, 0.23217319, 0.52223399, 0.59602222, 0.2098567 , + 0.46080418, 0.14908991, 0.56755986, 0.59005505, 0.27265958, + 0.09611405, 0.91465952, 0.85313787, 0.32016594, 0.95285913, + 0.22548781, 0.15398784, 0.19865442, 0.05737153, 0.25905621])) + + + +CV results can be found in the attribute `est_cv.cv_results_` and can be easily converted to a Pandas `DataFrame`. The best result corresponds to the row with index `est_cv.best_index_`. ```python @@ -1042,30 +224,30 @@ cv_res.loc[est_cv.best_index_, :] - structure_penalty 0.114250 + structure_penalty 0.062040 max_rank 10.000000 - factor_penalty 0.058822 - objective_value_penalized 1.999925 - mean_elapsed_process_time_penalized 9.922670 + factor_penalty 0.069416 + objective_value_penalized 1.934370 + mean_elapsed_process_time_penalized 6.977537 std_elapsed_process_time_penalized 0.000000 est_max_rank 5.000000 - structural_zeros 31.000000 - factor_zeros 1764.000000 - neg_mean_squared_error_fold0 -0.000172 - neg_mean_squared_error_fold1 -0.000192 - neg_mean_squared_error_fold2 -0.000196 + structural_zeros 30.000000 + factor_zeros 1746.000000 + neg_mean_squared_error_fold0 -0.000193 + neg_mean_squared_error_fold1 -0.000179 + neg_mean_squared_error_fold2 -0.000181 neg_mean_squared_error_fold3 -0.000185 - neg_mean_squared_error_fold4 -0.000182 - neg_mean_squared_error_fold5 -0.000190 - neg_mean_squared_error_fold6 -0.000201 - neg_mean_squared_error_fold7 -0.000202 - neg_mean_squared_error_fold8 -0.000200 - neg_mean_squared_error_fold9 -0.000181 - mean_elapsed_process_time_fixed 1.284826 - std_elapsed_process_time_fixed 0.132348 - mean_neg_mean_squared_error -0.000190 - std_neg_mean_squared_error 0.000009 - Name: 76, dtype: float64 + neg_mean_squared_error_fold4 -0.000189 + neg_mean_squared_error_fold5 -0.000184 + neg_mean_squared_error_fold6 -0.000190 + neg_mean_squared_error_fold7 -0.000184 + neg_mean_squared_error_fold8 -0.000182 + neg_mean_squared_error_fold9 -0.000189 + mean_elapsed_process_time_fixed 1.265778 + std_elapsed_process_time_fixed 0.094385 + mean_neg_mean_squared_error -0.000186 + std_neg_mean_squared_error 0.000004 + Name: 1, dtype: float64 @@ -1079,10 +261,10 @@ for k, d in est_cv.best_estimator_.ds_.items(): ) ``` - (0, 1) : [ 7.40 -0.00 4.59 0.00 -0.00] - (0, 2) : [-8.43 -0.00 -0.00 -5.00 0.00] - (1, 2, 0) : [-6.59 4.25 0.00 -0.00 -3.85] - (1, 2, 1) : [ 0.00 4.86 -9.22 0.00 0.00] + (0, 1) : [ 7.40 -3.27 4.59 0.00 -0.00] + (0, 2) : [-8.43 -0.00 -0.00 -5.15 0.00] + (1, 2, 0) : [-6.60 4.34 0.00 -0.00 -4.49] + (1, 2, 1) : [ 0.00 4.82 -9.21 0.00 0.00] Due to the small size of the data sources and signal-to-noise ratio of 0.5, it is not possible to recover singular values perfectly. However, thanks to unpenalized re-estimation, the strong shrinkage seen in the manual solution above is not present here. @@ -1097,7 +279,7 @@ np.sum(xs_sim["vs"][0][:, 0] * est_cv.best_estimator_.us_[0][:, 0]) - 0.9896296784962579 + 0.9886157937798741 @@ -1120,8 +302,12 @@ def false_positive_rate(estimate, truth): ```python ( - true_positive_rate(xs_sim["vs"][0][:, 0], est_cv.best_estimator_.us_[0][:, 0]), - false_positive_rate(xs_sim["vs"][0][:, 0], est_cv.best_estimator_.us_[0][:, 0]), + true_positive_rate( + xs_sim["vs"][0][:, 0], est_cv.best_estimator_.us_[0][:, 0] + ), + false_positive_rate( + xs_sim["vs"][0][:, 0], est_cv.best_estimator_.us_[0][:, 0] + ), ) ```