Skip to content

Commit

Permalink
control all randomness
Browse files Browse the repository at this point in the history
  • Loading branch information
cyianor committed May 21, 2024
1 parent cbded1d commit 4260e11
Show file tree
Hide file tree
Showing 2 changed files with 151 additions and 92 deletions.
134 changes: 108 additions & 26 deletions README.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 126,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -55,7 +55,7 @@
"xs_sim = solrcmf.simulate(\n",
" viewdims={0: 100, 1: 50, 2: 50},\n",
" factor_scales={\n",
" (0, 1): [7.0, 4.5, 3.9, 0.0, 0.0],\n",
" (0, 1): [7.0, 5.1, 4.6, 0.0, 0.0],\n",
" (0, 2): [8.3, 0.0, 0.0, 5.5, 0.0],\n",
" (1, 2, 0): [6.3, 0.0, 4.7, 0.0, 5.1],\n",
" (1, 2, 1): [0.0, 8.6, 4.9, 0.0, 0.0],\n",
Expand All @@ -81,7 +81,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 127,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -107,7 +107,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 128,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -149,11 +149,11 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 129,
"metadata": {},
"outputs": [],
"source": [
"est.fit(xs_scaled, vs=est_init.vs_, ds=est_init.ds_, us=est_init.vs_)"
"est.fit(xs_scaled, vs=est_init.vs_, ds=est_init.ds_, us=est_init.vs_);"
]
},
{
Expand All @@ -167,9 +167,20 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 130,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(0, 1) : [-0.00 2.34 -0.00 -0.00 -4.17 0.00 0.00 0.00 0.00 6.52]\n",
"(0, 2) : [ 0.00 -0.00 3.44 0.00 0.00 -0.00 -0.00 0.00 -0.00 -7.53]\n",
"(1, 2, 0) : [ 0.00 -3.21 -0.00 0.00 0.00 0.00 0.00 -0.00 -2.88 -5.67]\n",
"(1, 2, 1) : [ 0.00 -3.74 0.05 -0.00 -8.22 -0.00 0.00 -0.00 0.00 0.00]\n"
]
}
],
"source": [
"for k, d in est.ds_.items():\n",
" rescaled_d = d * np.sqrt((xs_centered[k] ** 2).sum())\n",
Expand Down Expand Up @@ -199,20 +210,24 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 131,
"metadata": {},
"outputs": [],
"source": [
"# Lists of structure and factor penalties are supplied containing the\n",
"# parameter combinations to be tested. Lists need to be of the same length\n",
"# or one needs to be a scalar.\n",
"# - `cv` number of folds\n",
"# - `cv` number of folds as an integer or an object of\n",
"# class `lscmf.ElementwiseFolds`. The latter is also used internally if only\n",
"# an integer is provided, however, it allows specification of a random\n",
"# number generator and whether or not inputs should be shuffled\n",
"# before splitting.\n",
"est_cv = solrcmf.SolrCMFCV(\n",
" max_rank=10,\n",
" structure_penalty=np.exp(rng.uniform(np.log(5e-2), np.log(1.0), 100)),\n",
" factor_penalty=np.exp(rng.uniform(np.log(5e-2), np.log(1.0), 100)),\n",
" mu=10,\n",
" cv=10,\n",
" cv=solrcmf.ElementwiseFolds(10, rng=rng),\n",
" init=\"custom\",\n",
" max_iter=100000,\n",
" n_jobs=4,\n",
Expand All @@ -228,14 +243,14 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 132,
"metadata": {},
"outputs": [],
"source": [
"# Initial values are supplied as lists. If length 1 then they are reused.\n",
"# If same length as hyperparameters then different initial values can be used\n",
"# for each pair of hyperparameters.\n",
"est_cv.fit(xs_scaled, vs=[est_init.vs_], ds=[est_init.ds_], us=[est_init.vs_])"
"est_cv.fit(xs_scaled, vs=[est_init.vs_], ds=[est_init.ds_], us=[est_init.vs_]);"
]
},
{
Expand All @@ -247,9 +262,43 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 133,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"structure_penalty 0.114250\n",
"max_rank 10.000000\n",
"factor_penalty 0.058822\n",
"objective_value_penalized 2.014111\n",
"mean_elapsed_process_time_penalized 7.391883\n",
"std_elapsed_process_time_penalized 0.000000\n",
"est_max_rank 5.000000\n",
"structural_zeros 30.000000\n",
"factor_zeros 1748.000000\n",
"neg_mean_squared_error_fold0 -0.000191\n",
"neg_mean_squared_error_fold1 -0.000189\n",
"neg_mean_squared_error_fold2 -0.000196\n",
"neg_mean_squared_error_fold3 -0.000169\n",
"neg_mean_squared_error_fold4 -0.000199\n",
"neg_mean_squared_error_fold5 -0.000188\n",
"neg_mean_squared_error_fold6 -0.000184\n",
"neg_mean_squared_error_fold7 -0.000185\n",
"neg_mean_squared_error_fold8 -0.000191\n",
"neg_mean_squared_error_fold9 -0.000181\n",
"mean_elapsed_process_time_fixed 1.239766\n",
"std_elapsed_process_time_fixed 0.132469\n",
"mean_neg_mean_squared_error -0.000187\n",
"std_neg_mean_squared_error 0.000008\n",
"Name: 76, dtype: float64"
]
},
"execution_count": 133,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
Expand All @@ -259,9 +308,20 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 134,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(0, 1) : [ 3.92 -0.00 -5.24 0.00 7.43]\n",
"(0, 2) : [-0.00 5.06 0.00 -0.00 -8.45]\n",
"(1, 2, 0) : [-4.26 -0.00 0.00 -4.17 -6.58]\n",
"(1, 2, 1) : [-4.84 0.00 -9.23 0.00 0.00]\n"
]
}
],
"source": [
"for k, d in est_cv.best_estimator_.ds_.items():\n",
" rescaled_d = d * np.sqrt((xs_centered[k] ** 2).sum())\n",
Expand All @@ -277,16 +337,27 @@
"source": [
"Due to the small size of the data sources and signal-to-noise ratio of 0.5, it is not possible to recover singular values perfectly. However, thanks to unpenalized re-estimation, the strong shrinkage seen in the manual solution above is not present here.\n",
"\n",
"The factor estimates are in `est_cv.best_estimator_.vs_`, however, sparse factors can be found in `est_cv.best_estimator_.us_`. In this particular run, factor 0 of view 0 in the groundtruth corresponds to factor 0 in view 0 of the estimate. Note that in general factor order is arbitrary."
"The factor estimates are in `est_cv.best_estimator_.vs_`, however, sparse factors can be found in `est_cv.best_estimator_.us_`. In this particular run, factor 1 of view 0 in the groundtruth corresponds to factor 5 in view 0 of the estimate. Note that in general factor order is arbitrary."
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 138,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"-0.9878174758052286"
]
},
"execution_count": 138,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.sum(xs_sim[\"vs\"][0][:, 0] * est_cv.best_estimator_.us_[0][:, 0])"
"np.sum(xs_sim[\"vs\"][0][:, 0] * est_cv.best_estimator_.us_[0][:, 4])"
]
},
{
Expand All @@ -298,7 +369,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 136,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -316,16 +387,27 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 139,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"(0.6410256410256411, 0.0)"
]
},
"execution_count": 139,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(\n",
" true_positive_rate(\n",
" xs_sim[\"vs\"][0][:, 0], est_cv.best_estimator_.us_[0][:, 0]\n",
" xs_sim[\"vs\"][0][:, 0], est_cv.best_estimator_.us_[0][:, 4]\n",
" ),\n",
" false_positive_rate(\n",
" xs_sim[\"vs\"][0][:, 0], est_cv.best_estimator_.us_[0][:, 0]\n",
" xs_sim[\"vs\"][0][:, 0], est_cv.best_estimator_.us_[0][:, 4]\n",
" ),\n",
")"
]
Expand Down
Loading

0 comments on commit 4260e11

Please sign in to comment.