update

nwlandry · Aug 26, 2023 · f30392d · f30392d
1 parent 582d4c6
commit f30392d
Show file tree

Hide file tree

Showing 5 changed files with 38 additions and 48 deletions.
diff --git a/Figures/Fig3/email-enron_local_simpliciality.pdf b/Figures/Fig3/email-enron_local_simpliciality.pdf
diff --git a/Figures/Fig3/email-enron_local_simpliciality.png b/Figures/Fig3/email-enron_local_simpliciality.png
diff --git a/README.md b/README.md
@@ -11,6 +11,11 @@ This repository accompanies the preprint, "Simpliciality is a spectrum: Quantify
 ```
 pip install -e .
 ```
+if you wish to be able to edit the distribution and
+```
+pip install .
+```
+if you don't.
 * To run the unit tests, run `pytest` in the command line.
 * The package is referenced as `sod` (Simpliciality of Data) when accessing the functionality.
 * There are also distance versions of some simpliciality measures in the code.
@@ -23,8 +28,9 @@ pip install -e .
 * `setup.py` allows users to pip install this package.
 
 ### Notebooks
-* `plot_empiricial_simpliciality.ipynb` generates a plot of the simpliciality for empirical datasets, which is unused in the text.
+* `plot_empiricial_simpliciality.ipynb` generates a plot of the simpliciality for empirical datasets, which is unused in the text. It also prints 
 * `plot_model_fitting.ipynb` generates Fig. 2 in the text.
 * `local_simpliciality.ipynb` generates Fig. 3 in the text as well as corresponding local measures.
 * `dataset_characteristics.ipynb` generates the results in Table 1 except the measures of simpliciality.
-* `illustrations.ipynb` generates the diagrams used in Fig. 1 in the text.
+* `illustrations.ipynb` generates the diagrams used in Fig. 1 in the text.
+* `simpliciality_correlation.ipynb` generates the correlation coefficients referenced in the text.
diff --git a/local_simpliciality.ipynb b/local_simpliciality.ipynb
@@ -53,15 +53,15 @@
     "nan_color = \"lightgrey\"\n",
     "\n",
     "c_sf = np.empty(H.num_nodes, dtype=object)\n",
-    "for n, s in sf:\n",
+    "for n, s in enumerate(sf):\n",
     "    c_sf[n] = cmap(s)\n",
     "\n",
     "c_es = np.empty(H.num_nodes, dtype=object)\n",
-    "for n, s in es:\n",
+    "for n, s in enumerate(es):\n",
     "    c_es[n] = cmap(s)\n",
     "\n",
     "c_fes = np.empty(H.num_nodes, dtype=object)\n",
-    "for n, s in fes:\n",
+    "for n, s in enumerate(fes):\n",
     "    c_fes[n] = cmap(s)"
    ]
   },
@@ -165,7 +165,7 @@
     "\n",
     "plt.figtext(0.1, 0.9375, \"A\")\n",
     "plt.figtext(0.1, 0.4565, \"B\")\n",
-    "plt.figtext(0.5625, 0.4565, \"A\")\n",
+    "plt.figtext(0.5625, 0.4565, \"C\")\n",
     "\n",
     "plt.tight_layout()\n",
     "plt.savefig(f\"Figures/Fig3/{dataset}_local_simpliciality.png\", dpi=1000)\n",
@@ -245,6 +245,13 @@
     "# plt.savefig(f\"Figures/Fig3/{dataset}_local_simpliciality.pdf\", dpi=1000)\n",
     "plt.show()"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

diff --git a/simpliciality_correlation.ipynb b/simpliciality_correlation.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -32,43 +32,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Correlation (spearman) between es and fes: 0.9030, p-value: 0.0003\n",
-      "Correlation (pearson) between es and fes: 0.9027, p-value: 0.0003\n",
-      "Correlation (spearman) between es and sf: 0.8875, p-value: 0.0006\n",
-      "Correlation (pearson) between es and sf: 0.9500, p-value: 0.0000\n",
-      "Correlation (spearman) between fes and sf: 0.9970, p-value: 0.0000\n",
-      "Correlation (pearson) between fes and sf: 0.9718, p-value: 0.0000\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "def load_data(filename=\"Data/empirical_simpliciality.json\"):\n",
-    "    with open(filename, 'r') as f:\n",
+    "    with open(filename, \"r\") as f:\n",
     "        data = json.load(f)\n",
     "    return data\n",
     "\n",
+    "\n",
     "# Use the function to load the data\n",
     "data = load_data()\n",
     "\n",
     "\n",
-    "\n",
     "# Convert the nested dictionary to a pandas DataFrame\n",
-    "df = pd.DataFrame.from_dict(data, orient='index')\n",
+    "df = pd.DataFrame.from_dict(data, orient=\"index\")\n",
     "\n",
     "# Specify the columns to compute correlations for\n",
     "columns = [\"es\", \"fes\", \"sf\"]\n",
     "\n",
     "# Compute the correlations and p-values for each pair of columns\n",
     "results = {}\n",
     "for i in range(len(columns)):\n",
-    "    for j in range(i+1, len(columns)):\n",
+    "    for j in range(i + 1, len(columns)):\n",
     "        col1, col2 = columns[i], columns[j]\n",
     "\n",
     "        # Spearman's correlation\n",
@@ -82,8 +69,9 @@
     "# Print results\n",
     "for key, (corr, p_val) in results.items():\n",
     "    col1, col2, method = key\n",
-    "    print(f\"Correlation ({method}) between {col1} and {col2}: {corr:.4f}, p-value: {p_val:.4f}\")\n",
-    "\n"
+    "    print(\n",
+    "        f\"Correlation ({method}) between {col1} and {col2}: {corr:.4f}, p-value: {p_val:.4f}\"\n",
+    "    )"
    ]
   },
   {
@@ -97,7 +85,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -110,7 +98,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -121,29 +109,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Correlation (spearman) between sf and es: 0.7774, p-value: 0.0000\n",
-      "Correlation (pearson) between sf and es: 0.6888, p-value: 0.0000\n",
-      "Correlation (spearman) between sf and fes: 0.9663, p-value: 0.0000\n",
-      "Correlation (pearson) between sf and fes: 0.8354, p-value: 0.0000\n",
-      "Correlation (spearman) between es and fes: 0.7982, p-value: 0.0000\n",
-      "Correlation (pearson) between es and fes: 0.7537, p-value: 0.0000\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "ls = {\"sf\": sf, \"es\": es, \"fes\": fes}\n",
     "results = {}\n",
     "\n",
     "for i, m1 in enumerate(ls):\n",
     "    for j, m2 in enumerate(ls):\n",
     "        if i < j:\n",
+    "            # the measures are NaN in the same places\n",
     "            s1 = ls[m1][~np.isnan(ls[m1])]\n",
     "            s2 = ls[m2][~np.isnan(ls[m2])]\n",
     "\n",
@@ -158,13 +134,14 @@
     "# Print results\n",
     "for key, (corr, p_val) in results.items():\n",
     "    col1, col2, method = key\n",
-    "    print(f\"Correlation ({method}) between {col1} and {col2}: {corr:.4f}, p-value: {p_val:.4f}\")\n",
-    "\n"
+    "    print(\n",
+    "        f\"Correlation ({method}) between {col1} and {col2}: {corr:.4f}, p-value: {p_val:.4f}\"\n",
+    "    )"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [