From d3b728c1f32b1adefcdec34515d51dbc854fa035 Mon Sep 17 00:00:00 2001 From: d-schindler <60650591+d-schindler@users.noreply.github.com> Date: Fri, 8 Nov 2024 10:42:31 +0000 Subject: [PATCH] renamed block_detection_curve to block_nvi --- src/pygenstability/optimal_scales.py | 29 ++++++++----- src/pygenstability/plotting.py | 47 +++++++++++++++------ src/pygenstability/pygenstability.py | 2 +- tests/data/test_dataclustering_default.yaml | 2 +- tests/data/test_dataclustering_knn.yaml | 2 +- 5 files changed, 57 insertions(+), 25 deletions(-) diff --git a/src/pygenstability/optimal_scales.py b/src/pygenstability/optimal_scales.py index be1a267..339ea54 100644 --- a/src/pygenstability/optimal_scales.py +++ b/src/pygenstability/optimal_scales.py @@ -33,14 +33,21 @@ def _pool2d_nvi(A, kernel_size, stride, padding=0): ) shape_w = (output_shape[0], output_shape[1], kernel_size, kernel_size) # pylint: disable=unsubscriptable-object - strides_w = (stride * A.strides[0], stride * A.strides[1], A.strides[0], A.strides[1]) + strides_w = ( + stride * A.strides[0], + stride * A.strides[1], + A.strides[0], + A.strides[1], + ) A_w = as_strided(A, shape_w, strides_w) # Return the result of pooling return np.nanmean(A_w, axis=(2, 3)) -def identify_optimal_scales(results, kernel_size=3, window_size=3, max_nvi=1, basin_radius=1): +def identify_optimal_scales( + results, kernel_size=3, window_size=3, max_nvi=1, basin_radius=1 +): """Identifies optimal scales in Markov Stability [1]_. Robust scales are found in a sequential way. We first search for large diagonal blocks @@ -57,7 +64,7 @@ def identify_optimal_scales(results, kernel_size=3, window_size=3, max_nvi=1, ba basin_radius (int): radius of basin around local minima of the pooled diagonal Returns: - result dictionary with two new keys: 'selected_partitions' and 'block_detection_curve' + result dictionary with two new keys: 'selected_partitions' and 'block_nvi' References: .. [1] D. J. Schindler, J. Clarke, and M. Barahona, 'Multiscale Mobility Patterns and @@ -74,22 +81,24 @@ def identify_optimal_scales(results, kernel_size=3, window_size=3, max_nvi=1, ba diagonal = np.diag(nvi_tt_pooled)[: len(nvi_t)] # smooth diagonal with moving window - block_detection_curve = np.roll( - np.asarray(pd.Series(diagonal).rolling(window=window_size, win_type="triang").mean()), + block_nvi = np.roll( + np.asarray( + pd.Series(diagonal).rolling(window=window_size, win_type="triang").mean() + ), -int(window_size / 2), ) - results["block_detection_curve"] = block_detection_curve + results["block_nvi"] = block_nvi # find local minima on diagonal of pooled NVI(s,s') - basin_centers, _ = find_peaks(-block_detection_curve, height=-max_nvi) + basin_centers, _ = find_peaks(-block_nvi, height=-max_nvi) # add robust scales located in large 0 margins - not_nan_ind = np.argwhere(~np.isnan(block_detection_curve)).flatten() + not_nan_ind = np.argwhere(~np.isnan(block_nvi)).flatten() if ( np.count_nonzero( np.around( - block_detection_curve[not_nan_ind[0] : not_nan_ind[0] + 2 * basin_radius + 1], 5 + block_nvi[not_nan_ind[0] : not_nan_ind[0] + 2 * basin_radius + 1], 5 ) ) == 0 @@ -99,7 +108,7 @@ def identify_optimal_scales(results, kernel_size=3, window_size=3, max_nvi=1, ba if ( np.count_nonzero( np.around( - block_detection_curve[not_nan_ind[-1] - 2 * basin_radius : not_nan_ind[-1] + 1], 5 + block_nvi[not_nan_ind[-1] - 2 * basin_radius : not_nan_ind[-1] + 1], 5 ) ) == 0 diff --git a/src/pygenstability/plotting.py b/src/pygenstability/plotting.py index 74a0451..f733e72 100644 --- a/src/pygenstability/plotting.py +++ b/src/pygenstability/plotting.py @@ -9,7 +9,9 @@ try: import networkx as nx except ImportError: # pragma: no cover - print('Please install networkx via pip install "pygenstability[networkx]" for full plotting.') + print( + 'Please install networkx via pip install "pygenstability[networkx]" for full plotting.' + ) import numpy as np from matplotlib import gridspec @@ -48,7 +50,9 @@ def plot_scan( plotly_filename (str): filename of .html figure from plotly """ if len(all_results["scales"]) == 1: # pragma: no cover - L.info("Cannot plot the results if only one scale point, we display the result instead:") + L.info( + "Cannot plot the results if only one scale point, we display the result instead:" + ) L.info(all_results) return None @@ -263,7 +267,11 @@ def plot_optimal_partitions( for optimal_scale_id in selected_scales: plot_single_partition( - graph, all_results, optimal_scale_id, edge_color=edge_color, edge_width=edge_width + graph, + all_results, + optimal_scale_id, + edge_color=edge_color, + edge_width=edge_width, ) plt.savefig(f"{folder}/scale_{optimal_scale_id}{ext}", bbox_inches="tight") if show: # pragma: no cover @@ -271,7 +279,12 @@ def plot_optimal_partitions( def plot_communities( - graph, all_results, folder="communities", edge_color="0.5", edge_width=0.5, ext=".pdf" + graph, + all_results, + folder="communities", + edge_color="0.5", + edge_width=0.5, + ext=".pdf", ): """Plot the community structures at each scale in a folder. @@ -293,12 +306,16 @@ def plot_communities( plot_single_partition( graph, all_results, scale_id, edge_color=edge_color, edge_width=edge_width ) - plt.savefig(os.path.join(folder, "scale_" + str(scale_id) + ext), bbox_inches="tight") + plt.savefig( + os.path.join(folder, "scale_" + str(scale_id) + ext), bbox_inches="tight" + ) plt.close() matplotlib.use(mpl_backend) -def plot_communities_matrix(graph, all_results, folder="communities_matrix", ext=".pdf"): +def plot_communities_matrix( + graph, all_results, folder="communities_matrix", ext=".pdf" +): """Plot communities at all scales in matrix form. Args: @@ -327,7 +344,9 @@ def plot_communities_matrix(graph, all_results, folder="communities_matrix", ext plt.plot((lines[i + 1], lines[i + 1]), (lines[i + 1], lines[i]), c="k") plt.plot((lines[i + 1], lines[i]), (lines[i + 1], lines[i + 1]), c="k") - plt.savefig(os.path.join(folder, "scale_" + str(scale_id) + ext), bbox_inches="tight") + plt.savefig( + os.path.join(folder, "scale_" + str(scale_id) + ext), bbox_inches="tight" + ) def _get_scales(all_results, scale_axis=True): @@ -341,7 +360,9 @@ def _get_scales(all_results, scale_axis=True): def _plot_number_comm(all_results, ax, scales): """Plot number of communities.""" - ax.plot(scales, all_results["number_of_communities"], "-", c="C3", label="size", lw=2.0) + ax.plot( + scales, all_results["number_of_communities"], "-", c="C3", label="size", lw=2.0 + ) ax.set_ylim(0, 1.1 * max(all_results["number_of_communities"])) ax.set_ylabel("# clusters", color="C3") ax.tick_params("y", colors="C3") @@ -349,7 +370,9 @@ def _plot_number_comm(all_results, ax, scales): def _plot_ttprime(all_results, ax, scales): """Plot ttprime.""" - contourf_ = ax.contourf(scales, scales, all_results["ttprime"], cmap="YlOrBr_r", extend="min") + contourf_ = ax.contourf( + scales, scales, all_results["ttprime"], cmap="YlOrBr_r", extend="min" + ) ax.set_ylabel(r"$log_{10}(t^\prime)$") ax.yaxis.tick_left() ax.yaxis.set_label_position("left") @@ -394,7 +417,7 @@ def _plot_optimal_scales(all_results, ax, scales, ax1, ax2): """Plot stability.""" ax.plot( scales, - all_results["block_detection_curve"], + all_results["block_nvi"], "-", lw=2.0, c="C4", @@ -402,7 +425,7 @@ def _plot_optimal_scales(all_results, ax, scales, ax1, ax2): ) ax.plot( scales[all_results["selected_partitions"]], - all_results["block_detection_curve"][all_results["selected_partitions"]], + all_results["block_nvi"][all_results["selected_partitions"]], "o", lw=2.0, c="C4", @@ -456,7 +479,7 @@ def plot_scan_plt(all_results, scale_axis=True, figure_name="scan_results.svg"): _plot_number_comm(all_results, ax=ax3, scales=scales) axes.append(ax3) - if "block_detection_curve" in all_results: + if "block_nvi" in all_results: ax4 = plt.subplot(gs[2, 0]) _plot_optimal_scales(all_results, ax=ax4, scales=scales, ax1=ax1, ax2=ax2) axes.append(ax4) diff --git a/src/pygenstability/pygenstability.py b/src/pygenstability/pygenstability.py index 24bcce9..0e28498 100644 --- a/src/pygenstability/pygenstability.py +++ b/src/pygenstability/pygenstability.py @@ -210,7 +210,7 @@ def run( optimisation evaluations at each scale (included if with_all_tries==True) - 'NVI': NVI(t) at each scale - 'ttprime': NVI(t,tprime) matrix - - 'block_detection_curve': block detection curve (included if with_optimal_scales==True) + - 'block_nvi': block NVI curve (included if with_optimal_scales==True) - 'selected_partitions': selected partitions (included if with_optimal_scales==True) """ diff --git a/tests/data/test_dataclustering_default.yaml b/tests/data/test_dataclustering_default.yaml index 7b9f0ae..3382f15 100644 --- a/tests/data/test_dataclustering_default.yaml +++ b/tests/data/test_dataclustering_default.yaml @@ -2400,7 +2400,7 @@ labels: - 31.0 - 31.0 results: - block_detection_curve: + block_nvi: - .nan - 0.0680975542371894 - 0.05125455197832432 diff --git a/tests/data/test_dataclustering_knn.yaml b/tests/data/test_dataclustering_knn.yaml index 712f43b..202d326 100644 --- a/tests/data/test_dataclustering_knn.yaml +++ b/tests/data/test_dataclustering_knn.yaml @@ -1800,7 +1800,7 @@ labels: - 25.0 - 25.0 results: - block_detection_curve: + block_nvi: - .nan - 0.10668153950024195 - 0.08542249390900095