Skip to content

Commit

Permalink
move normalised conditional entropy to utils
Browse files Browse the repository at this point in the history
  • Loading branch information
d-schindler committed Aug 18, 2023
1 parent 2658c63 commit c7e802b
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 49 deletions.
50 changes: 1 addition & 49 deletions notebooks/02_baseline_network_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
" sys.path.append(module_path)\n",
"\n",
"from network_analysis import remove_self_loops, visualse_largest_components\n",
"from utils import R2_score\n",
"from utils import R2_score, normalised_conditional_entropy\n",
"\n",
"root_figure = path+\"/figures/\"\n",
"root_map = path+'/data/geo_shapefiles//NUTS_Level_3__January_2018__Boundaries-shp/NUTS_Level_3__January_2018__Boundaries.shp'\n",
Expand Down Expand Up @@ -3798,54 +3798,6 @@
"see Lambiotte et al. 2009: belongs to the interval [0, 1], but is now an asymmetric quantity that vanishes only if each community of Pt is the union of communities of Pt"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"def entropy(labels):\n",
" \"\"\"Calculates the entropy for a labeling.\n",
" Parameters\n",
" ----------\n",
" labels : int array, shape = [n_samples]\n",
" The labels\n",
" Notes\n",
" -----\n",
" The logarithm used is the natural logarithm (base-e).\n",
" \"\"\"\n",
" if len(labels) == 0:\n",
" return 1.0\n",
" label_idx = np.unique(labels, return_inverse=True)[1]\n",
" pi = np.bincount(label_idx).astype(np.float64)\n",
" pi = pi[pi > 0]\n",
" pi_sum = np.sum(pi)\n",
" # log(a / b) should be calculated as log(a) - log(b) for\n",
" # possible loss of precision\n",
" return -np.sum((pi / pi_sum) * (np.log(pi) - log(pi_sum)))\n",
"\n",
"def variation_of_information(x,y, normalised = True):\n",
" Ex = entropy(x)\n",
" Ey = entropy(y)\n",
" I = metrics.mutual_info_score(x,y)\n",
" \n",
" if normalised:\n",
" return (Ex + Ey - 2*I) / (Ex + Ey - I)\n",
" else: \n",
" return Ex + Ey - 2*I\n",
"\n",
"def normalised_conditional_entropy(x,y):\n",
" \"\"\"\n",
" H(X|Y) = H(X) - I(X,Y) and we normalise with log(N)\n",
" \"\"\"\n",
" \n",
" N = len(x)\n",
" Ex = entropy(x)\n",
" I = metrics.mutual_info_score(x,y)\n",
"\n",
" return (Ex - I) / np.log(N)"
]
},
{
"cell_type": "code",
"execution_count": 38,
Expand Down
12 changes: 12 additions & 0 deletions src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,15 @@ def variation_of_information(x, y, normalised=True):
return (Ex + Ey - 2 * I) / (Ex + Ey - I)
else:
return Ex + Ey - 2 * I


def normalised_conditional_entropy(x, y):
"""
H(X|Y) = H(X) - I(X,Y) and we normalise with log(N)
"""

N = len(x)
Ex = entropy(x)
I = metrics.mutual_info_score(x, y)

return (Ex - I) / np.log(N)

0 comments on commit c7e802b

Please sign in to comment.