From 4ba52d8006969ae9d48b7f38a707d491e8c0cbf9 Mon Sep 17 00:00:00 2001 From: kapoorlab Date: Tue, 23 Jan 2024 22:52:32 +0100 Subject: [PATCH] lv --- src/napatrackmater/Trackvector.py | 1023 ++++++++++++++--------------- src/napatrackmater/_version.py | 4 +- 2 files changed, 500 insertions(+), 527 deletions(-) diff --git a/src/napatrackmater/Trackvector.py b/src/napatrackmater/Trackvector.py index e745cb20..7a187885 100644 --- a/src/napatrackmater/Trackvector.py +++ b/src/napatrackmater/Trackvector.py @@ -590,15 +590,23 @@ def get_shape_dynamic_feature_dataframe(self): [global_shape_dynamic_dataframe, shape_dynamic_dataframe], ignore_index=True, ) - global_shape_dynamic_dataframe["TrackMate Track ID"] = global_shape_dynamic_dataframe["Track ID"].map(self.tracklet_id_to_trackmate_id) - trackmate_ids = global_shape_dynamic_dataframe['TrackMate Track ID'] + global_shape_dynamic_dataframe[ + "TrackMate Track ID" + ] = global_shape_dynamic_dataframe["Track ID"].map( + self.tracklet_id_to_trackmate_id + ) + trackmate_ids = global_shape_dynamic_dataframe["TrackMate Track ID"] track_duration_dict = {} - for trackmate_id in trackmate_ids : + for trackmate_id in trackmate_ids: track_properties = self.unique_track_properties[trackmate_id] - total_track_duration = track_properties[:,18][0] + total_track_duration = track_properties[:, 18][0] track_duration_dict[trackmate_id] = int(total_track_duration) - global_shape_dynamic_dataframe["Track Duration"] = global_shape_dynamic_dataframe["TrackMate Track ID"].map(track_duration_dict) - + global_shape_dynamic_dataframe[ + "Track Duration" + ] = global_shape_dynamic_dataframe["TrackMate Track ID"].map( + track_duration_dict + ) + global_shape_dynamic_dataframe = global_shape_dynamic_dataframe.sort_values( by=["Track ID"] ) @@ -742,11 +750,13 @@ def create_analysis_tracklets( ): training_tracklets = {} if t_minus is not None and t_plus is not None: - time_mask = (global_shape_dynamic_dataframe['t'] >= t_minus) & (global_shape_dynamic_dataframe['t'] <= t_plus ) - local_shape_dynamic_dataframe = global_shape_dynamic_dataframe[time_mask] + time_mask = (global_shape_dynamic_dataframe["t"] >= t_minus) & ( + global_shape_dynamic_dataframe["t"] <= t_plus + ) + local_shape_dynamic_dataframe = global_shape_dynamic_dataframe[time_mask] else: local_shape_dynamic_dataframe = global_shape_dynamic_dataframe - + subset_dividing = local_shape_dynamic_dataframe[ local_shape_dynamic_dataframe["Dividing"] == 1 ] @@ -769,7 +779,6 @@ def create_analysis_tracklets( list(non_dividing_track_ids), non_dividing_count ) - for track_id in dividing_track_ids: subset_dividing = subset_dividing.loc[ local_shape_dynamic_dataframe.duplicated( @@ -777,28 +786,25 @@ def create_analysis_tracklets( ) ] track_data = local_shape_dynamic_dataframe[ - (local_shape_dynamic_dataframe["Track ID"] == track_id) - ].sort_values(by="t") + (local_shape_dynamic_dataframe["Track ID"] == track_id) + ].sort_values(by="t") if track_data.shape[0] > 0: - training_tracklets = _iterate_over_tracklets( - track_data, training_tracklets, track_id - ) + training_tracklets = _iterate_over_tracklets( + track_data, training_tracklets, track_id + ) for track_id in non_dividing_track_ids: track_data = local_shape_dynamic_dataframe[ - (local_shape_dynamic_dataframe["Track ID"] == track_id) - ].sort_values(by="t") + (local_shape_dynamic_dataframe["Track ID"] == track_id) + ].sort_values(by="t") if track_data.shape[0] > 0: - training_tracklets = _iterate_over_tracklets( - track_data, training_tracklets, track_id - ) + training_tracklets = _iterate_over_tracklets( + track_data, training_tracklets, track_id + ) return training_tracklets, local_shape_dynamic_dataframe - - - def z_score_normalization(data): normalized_data = data return normalized_data @@ -1398,171 +1404,188 @@ def calculate_cluster_centroids(data, labels): centroids.append(centroid) return np.array(centroids) + def simple_unsupervised_clustering( full_dataframe, csv_file_name, analysis_vectors, cluster_threshold=3, - t_delta = 10, + t_delta=10, metric="euclidean", method="centroid", criterion="distance", ): - - - csv_file_name_original = csv_file_name - analysis_track_ids = [] - shape_dynamic_covariance_matrix = [] - shape_covariance_matrix = [] - dynamic_covariance_matrix = [] - for track_id, ( - shape_dynamic_dataframe_list, - shape_dataframe_list, - dynamic_dataframe_list, - full_dataframe_list, - ) in analysis_vectors.items(): - shape_dynamic_track_array = np.array( - [ - [item for item in record.values()] - for record in shape_dynamic_dataframe_list - ] - ) - shape_track_array = np.array( - [[item for item in record.values()] for record in shape_dataframe_list] - ) - dynamic_track_array = np.array( - [[item for item in record.values()] for record in dynamic_dataframe_list] - ) - assert ( - shape_dynamic_track_array.shape[0] - == shape_track_array.shape[0] - == dynamic_track_array.shape[0] - ), "Shape dynamic, shape and dynamic track arrays must have the same length." - if shape_dynamic_track_array.shape[0] > 1: - - covariance_computation_shape_dynamic = compute_raw_matrix(shape_dynamic_track_array, t_delta = t_delta) - - covaraince_computation_shape = compute_raw_matrix( - shape_track_array, t_delta = t_delta - ) - - covaraince_computation_dynamic = compute_raw_matrix( - dynamic_track_array, t_delta = t_delta - ) - if covariance_computation_shape_dynamic is not None and covaraince_computation_shape is not None and covaraince_computation_dynamic is not None: - - shape_dynamic_eigenvectors = covariance_computation_shape_dynamic - shape_eigenvectors = covaraince_computation_shape - dynamic_eigenvectors = covaraince_computation_dynamic - shape_dynamic_covariance_matrix.append(shape_dynamic_eigenvectors) - shape_covariance_matrix.append(shape_eigenvectors) - dynamic_covariance_matrix.append(dynamic_eigenvectors) - analysis_track_ids.append(track_id) - if len(shape_dynamic_covariance_matrix) > 0 and len(shape_covariance_matrix) > 0 and len(dynamic_covariance_matrix) > 0: - shape_dynamic_covariance_3d = np.dstack(shape_dynamic_covariance_matrix) - shape_covariance_3d = np.dstack(shape_covariance_matrix) - dynamic_covariance_3d = np.dstack(dynamic_covariance_matrix) - - shape_dynamic_covariance_matrix = np.mean(shape_dynamic_covariance_matrix, axis=0) - shape_covariance_matrix = np.mean(shape_covariance_matrix, axis=0) - dynamic_covariance_matrix = np.mean(dynamic_covariance_matrix, axis=0) - - shape_dynamic_covariance_2d = shape_dynamic_covariance_3d.reshape( - len(analysis_track_ids), -1 - ) - shape_covariance_2d = shape_covariance_3d.reshape(len(analysis_track_ids), -1) - dynamic_covariance_2d = dynamic_covariance_3d.reshape(len(analysis_track_ids), -1) - track_arrays_array = [ - shape_dynamic_covariance_matrix, - shape_covariance_matrix, - dynamic_covariance_matrix, - ] + csv_file_name_original = csv_file_name + analysis_track_ids = [] + shape_dynamic_covariance_matrix = [] + shape_covariance_matrix = [] + dynamic_covariance_matrix = [] + for track_id, ( + shape_dynamic_dataframe_list, + shape_dataframe_list, + dynamic_dataframe_list, + full_dataframe_list, + ) in analysis_vectors.items(): + shape_dynamic_track_array = np.array( + [ + [item for item in record.values()] + for record in shape_dynamic_dataframe_list + ] + ) + shape_track_array = np.array( + [[item for item in record.values()] for record in shape_dataframe_list] + ) + dynamic_track_array = np.array( + [[item for item in record.values()] for record in dynamic_dataframe_list] + ) + assert ( + shape_dynamic_track_array.shape[0] + == shape_track_array.shape[0] + == dynamic_track_array.shape[0] + ), "Shape dynamic, shape and dynamic track arrays must have the same length." + if shape_dynamic_track_array.shape[0] > 1: - track_arrays_array_names = ["shape_dynamic", "shape", "dynamic"] - clusterable_track_arrays = [ - shape_dynamic_covariance_2d, - shape_covariance_2d, - dynamic_covariance_2d, - ] + covariance_computation_shape_dynamic = compute_raw_matrix( + shape_dynamic_track_array, t_delta=t_delta + ) - for track_arrays in track_arrays_array: - clusterable_track_array = clusterable_track_arrays[ - track_arrays_array.index(track_arrays) - ] - shape_dynamic_cosine_distance = pdist(clusterable_track_array, metric=metric) + covaraince_computation_shape = compute_raw_matrix( + shape_track_array, t_delta=t_delta + ) - shape_dynamic_linkage_matrix = linkage( - shape_dynamic_cosine_distance, method=method - ) - shape_dynamic_cluster_labels = fcluster( - shape_dynamic_linkage_matrix, cluster_threshold, criterion=criterion - ) + covaraince_computation_dynamic = compute_raw_matrix( + dynamic_track_array, t_delta=t_delta + ) + if ( + covariance_computation_shape_dynamic is not None + and covaraince_computation_shape is not None + and covaraince_computation_dynamic is not None + ): - cluster_centroids = calculate_cluster_centroids( - clusterable_track_array, shape_dynamic_cluster_labels - ) - silhouette = silhouette_score( - clusterable_track_array, shape_dynamic_cluster_labels, metric=metric - ) - wcss_value = calculate_wcss( - clusterable_track_array, shape_dynamic_cluster_labels, cluster_centroids - ) + shape_dynamic_eigenvectors = covariance_computation_shape_dynamic + shape_eigenvectors = covaraince_computation_shape + dynamic_eigenvectors = covaraince_computation_dynamic + shape_dynamic_covariance_matrix.append(shape_dynamic_eigenvectors) + shape_covariance_matrix.append(shape_eigenvectors) + dynamic_covariance_matrix.append(dynamic_eigenvectors) + analysis_track_ids.append(track_id) + if ( + len(shape_dynamic_covariance_matrix) > 0 + and len(shape_covariance_matrix) > 0 + and len(dynamic_covariance_matrix) > 0 + ): + shape_dynamic_covariance_3d = np.dstack(shape_dynamic_covariance_matrix) + shape_covariance_3d = np.dstack(shape_covariance_matrix) + dynamic_covariance_3d = np.dstack(dynamic_covariance_matrix) - silhouette_file_name = os.path.join( - csv_file_name_original - + track_arrays_array_names[track_arrays_array.index(track_arrays)] - + f"_silhouette_{metric}_{cluster_threshold}.npy" - ) - np.save(silhouette_file_name, silhouette) + shape_dynamic_covariance_matrix = np.mean( + shape_dynamic_covariance_matrix, axis=0 + ) + shape_covariance_matrix = np.mean(shape_covariance_matrix, axis=0) + dynamic_covariance_matrix = np.mean(dynamic_covariance_matrix, axis=0) - wcss_file_name = os.path.join( - csv_file_name_original - + track_arrays_array_names[track_arrays_array.index(track_arrays)] - + f"_wcss_{metric}_{cluster_threshold}.npy" - ) - np.save(wcss_file_name, wcss_value) - track_id_to_cluster = { - track_id: cluster_label - for track_id, cluster_label in zip( - analysis_track_ids, shape_dynamic_cluster_labels - ) - } - full_dataframe["Cluster"] = full_dataframe["Track ID"].map(track_id_to_cluster) - result_dataframe = full_dataframe[["Track ID", "t", "z", "y", "x", "Cluster"]] - csv_file_name = ( - csv_file_name_original - + track_arrays_array_names[track_arrays_array.index(track_arrays)] - + ".csv" - ) + shape_dynamic_covariance_2d = shape_dynamic_covariance_3d.reshape( + len(analysis_track_ids), -1 + ) + shape_covariance_2d = shape_covariance_3d.reshape(len(analysis_track_ids), -1) + dynamic_covariance_2d = dynamic_covariance_3d.reshape( + len(analysis_track_ids), -1 + ) - if os.path.exists(csv_file_name): - os.remove(csv_file_name) - result_dataframe.to_csv(csv_file_name, index=False) + track_arrays_array = [ + shape_dynamic_covariance_matrix, + shape_covariance_matrix, + dynamic_covariance_matrix, + ] - mean_matrix_file_name = ( - csv_file_name_original - + track_arrays_array_names[track_arrays_array.index(track_arrays)] - + f"_{metric}_covariance.npy" - ) - np.save(mean_matrix_file_name, track_arrays) + track_arrays_array_names = ["shape_dynamic", "shape", "dynamic"] + clusterable_track_arrays = [ + shape_dynamic_covariance_2d, + shape_covariance_2d, + dynamic_covariance_2d, + ] - linkage_npy_file_name = ( - csv_file_name_original - + track_arrays_array_names[track_arrays_array.index(track_arrays)] - + f"_{metric}_linkage.npy" - ) - np.save(linkage_npy_file_name, shape_dynamic_linkage_matrix) + for track_arrays in track_arrays_array: + clusterable_track_array = clusterable_track_arrays[ + track_arrays_array.index(track_arrays) + ] + shape_dynamic_cosine_distance = pdist( + clusterable_track_array, metric=metric + ) - cluster_labels_npy_file_name = ( - csv_file_name_original - + track_arrays_array_names[track_arrays_array.index(track_arrays)] - + f"_{metric}_cluster_labels.npy" - ) - np.save(cluster_labels_npy_file_name, shape_dynamic_cluster_labels) + shape_dynamic_linkage_matrix = linkage( + shape_dynamic_cosine_distance, method=method + ) + shape_dynamic_cluster_labels = fcluster( + shape_dynamic_linkage_matrix, cluster_threshold, criterion=criterion + ) - + cluster_centroids = calculate_cluster_centroids( + clusterable_track_array, shape_dynamic_cluster_labels + ) + silhouette = silhouette_score( + clusterable_track_array, shape_dynamic_cluster_labels, metric=metric + ) + wcss_value = calculate_wcss( + clusterable_track_array, shape_dynamic_cluster_labels, cluster_centroids + ) + silhouette_file_name = os.path.join( + csv_file_name_original + + track_arrays_array_names[track_arrays_array.index(track_arrays)] + + f"_silhouette_{metric}_{cluster_threshold}.npy" + ) + np.save(silhouette_file_name, silhouette) + + wcss_file_name = os.path.join( + csv_file_name_original + + track_arrays_array_names[track_arrays_array.index(track_arrays)] + + f"_wcss_{metric}_{cluster_threshold}.npy" + ) + np.save(wcss_file_name, wcss_value) + track_id_to_cluster = { + track_id: cluster_label + for track_id, cluster_label in zip( + analysis_track_ids, shape_dynamic_cluster_labels + ) + } + full_dataframe["Cluster"] = full_dataframe["Track ID"].map( + track_id_to_cluster + ) + result_dataframe = full_dataframe[ + ["Track ID", "t", "z", "y", "x", "Cluster"] + ] + csv_file_name = ( + csv_file_name_original + + track_arrays_array_names[track_arrays_array.index(track_arrays)] + + ".csv" + ) + + if os.path.exists(csv_file_name): + os.remove(csv_file_name) + result_dataframe.to_csv(csv_file_name, index=False) + + mean_matrix_file_name = ( + csv_file_name_original + + track_arrays_array_names[track_arrays_array.index(track_arrays)] + + f"_{metric}_covariance.npy" + ) + np.save(mean_matrix_file_name, track_arrays) + + linkage_npy_file_name = ( + csv_file_name_original + + track_arrays_array_names[track_arrays_array.index(track_arrays)] + + f"_{metric}_linkage.npy" + ) + np.save(linkage_npy_file_name, shape_dynamic_linkage_matrix) + + cluster_labels_npy_file_name = ( + csv_file_name_original + + track_arrays_array_names[track_arrays_array.index(track_arrays)] + + f"_{metric}_cluster_labels.npy" + ) + np.save(cluster_labels_npy_file_name, shape_dynamic_cluster_labels) def unsupervised_clustering( @@ -1574,162 +1597,180 @@ def unsupervised_clustering( method="ward", criterion="maxclust", ): - - - csv_file_name_original = csv_file_name - analysis_track_ids = [] - shape_dynamic_covariance_matrix = [] - shape_covariance_matrix = [] - dynamic_covariance_matrix = [] - for track_id, ( - shape_dynamic_dataframe_list, - shape_dataframe_list, - dynamic_dataframe_list, - full_dataframe_list, - ) in analysis_vectors.items(): - shape_dynamic_track_array = np.array( - [ - [item for item in record.values()] - for record in shape_dynamic_dataframe_list - ] - ) - shape_track_array = np.array( - [[item for item in record.values()] for record in shape_dataframe_list] - ) - dynamic_track_array = np.array( - [[item for item in record.values()] for record in dynamic_dataframe_list] - ) - assert ( - shape_dynamic_track_array.shape[0] - == shape_track_array.shape[0] - == dynamic_track_array.shape[0] - ), "Shape dynamic, shape and dynamic track arrays must have the same length." - if shape_dynamic_track_array.shape[0] > 1: - - covariance_computation_shape_dynamic = compute_covariance_matrix(shape_dynamic_track_array) - - covaraince_computation_shape = compute_covariance_matrix( - shape_track_array - ) - - covaraince_computation_dynamic = compute_covariance_matrix( - dynamic_track_array - ) - if covariance_computation_shape_dynamic is not None and covaraince_computation_shape is not None and covaraince_computation_dynamic is not None: - ( - shape_dynamic_covariance, - shape_dynamic_eigenvectors, - ) = covariance_computation_shape_dynamic - (shape_covariance, shape_eigenvectors ) = covaraince_computation_shape - ( dynamic_covaraince, dynamic_eigenvectors ) = covaraince_computation_dynamic - shape_dynamic_covariance_matrix.append(shape_dynamic_eigenvectors) - shape_covariance_matrix.append(shape_eigenvectors) - dynamic_covariance_matrix.append(dynamic_eigenvectors) - analysis_track_ids.append(track_id) - if len(shape_dynamic_covariance_matrix) > 0 and len(shape_covariance_matrix) > 0 and len(dynamic_covariance_matrix) > 0: - shape_dynamic_covariance_3d = np.dstack(shape_dynamic_covariance_matrix) - shape_covariance_3d = np.dstack(shape_covariance_matrix) - dynamic_covariance_3d = np.dstack(dynamic_covariance_matrix) - - shape_dynamic_covariance_matrix = np.mean(shape_dynamic_covariance_matrix, axis=0) - shape_covariance_matrix = np.mean(shape_covariance_matrix, axis=0) - dynamic_covariance_matrix = np.mean(dynamic_covariance_matrix, axis=0) - - shape_dynamic_covariance_2d = shape_dynamic_covariance_3d.reshape( - len(analysis_track_ids), -1 - ) - shape_covariance_2d = shape_covariance_3d.reshape(len(analysis_track_ids), -1) - dynamic_covariance_2d = dynamic_covariance_3d.reshape(len(analysis_track_ids), -1) - track_arrays_array = [ - shape_dynamic_covariance_matrix, - shape_covariance_matrix, - dynamic_covariance_matrix, - ] + csv_file_name_original = csv_file_name + analysis_track_ids = [] + shape_dynamic_covariance_matrix = [] + shape_covariance_matrix = [] + dynamic_covariance_matrix = [] + for track_id, ( + shape_dynamic_dataframe_list, + shape_dataframe_list, + dynamic_dataframe_list, + full_dataframe_list, + ) in analysis_vectors.items(): + shape_dynamic_track_array = np.array( + [ + [item for item in record.values()] + for record in shape_dynamic_dataframe_list + ] + ) + shape_track_array = np.array( + [[item for item in record.values()] for record in shape_dataframe_list] + ) + dynamic_track_array = np.array( + [[item for item in record.values()] for record in dynamic_dataframe_list] + ) + assert ( + shape_dynamic_track_array.shape[0] + == shape_track_array.shape[0] + == dynamic_track_array.shape[0] + ), "Shape dynamic, shape and dynamic track arrays must have the same length." + if shape_dynamic_track_array.shape[0] > 1: - track_arrays_array_names = ["shape_dynamic", "shape", "dynamic"] - clusterable_track_arrays = [ - shape_dynamic_covariance_2d, - shape_covariance_2d, - dynamic_covariance_2d, - ] + covariance_computation_shape_dynamic = compute_covariance_matrix( + shape_dynamic_track_array + ) - for track_arrays in track_arrays_array: - clusterable_track_array = clusterable_track_arrays[ - track_arrays_array.index(track_arrays) - ] - shape_dynamic_cosine_distance = pdist(clusterable_track_array, metric=metric) + covaraince_computation_shape = compute_covariance_matrix(shape_track_array) - shape_dynamic_linkage_matrix = linkage( - shape_dynamic_cosine_distance, method=method - ) - shape_dynamic_cluster_labels = fcluster( - shape_dynamic_linkage_matrix, cluster_threshold, criterion=criterion - ) + covaraince_computation_dynamic = compute_covariance_matrix( + dynamic_track_array + ) + if ( + covariance_computation_shape_dynamic is not None + and covaraince_computation_shape is not None + and covaraince_computation_dynamic is not None + ): + ( + shape_dynamic_covariance, + shape_dynamic_eigenvectors, + ) = covariance_computation_shape_dynamic + (shape_covariance, shape_eigenvectors) = covaraince_computation_shape + ( + dynamic_covaraince, + dynamic_eigenvectors, + ) = covaraince_computation_dynamic + shape_dynamic_covariance_matrix.append(shape_dynamic_eigenvectors) + shape_covariance_matrix.append(shape_eigenvectors) + dynamic_covariance_matrix.append(dynamic_eigenvectors) + analysis_track_ids.append(track_id) + if ( + len(shape_dynamic_covariance_matrix) > 0 + and len(shape_covariance_matrix) > 0 + and len(dynamic_covariance_matrix) > 0 + ): + shape_dynamic_covariance_3d = np.dstack(shape_dynamic_covariance_matrix) + shape_covariance_3d = np.dstack(shape_covariance_matrix) + dynamic_covariance_3d = np.dstack(dynamic_covariance_matrix) - cluster_centroids = calculate_cluster_centroids( - clusterable_track_array, shape_dynamic_cluster_labels - ) - silhouette = silhouette_score( - clusterable_track_array, shape_dynamic_cluster_labels, metric=metric - ) - wcss_value = calculate_wcss( - clusterable_track_array, shape_dynamic_cluster_labels, cluster_centroids - ) + shape_dynamic_covariance_matrix = np.mean( + shape_dynamic_covariance_matrix, axis=0 + ) + shape_covariance_matrix = np.mean(shape_covariance_matrix, axis=0) + dynamic_covariance_matrix = np.mean(dynamic_covariance_matrix, axis=0) - silhouette_file_name = os.path.join( - csv_file_name_original - + track_arrays_array_names[track_arrays_array.index(track_arrays)] - + f"_silhouette_{metric}_{cluster_threshold}.npy" - ) - np.save(silhouette_file_name, silhouette) + shape_dynamic_covariance_2d = shape_dynamic_covariance_3d.reshape( + len(analysis_track_ids), -1 + ) + shape_covariance_2d = shape_covariance_3d.reshape(len(analysis_track_ids), -1) + dynamic_covariance_2d = dynamic_covariance_3d.reshape( + len(analysis_track_ids), -1 + ) - wcss_file_name = os.path.join( - csv_file_name_original - + track_arrays_array_names[track_arrays_array.index(track_arrays)] - + f"_wcss_{metric}_{cluster_threshold}.npy" - ) - np.save(wcss_file_name, wcss_value) - track_id_to_cluster = { - track_id: cluster_label - for track_id, cluster_label in zip( - analysis_track_ids, shape_dynamic_cluster_labels - ) - } - full_dataframe["Cluster"] = full_dataframe["Track ID"].map(track_id_to_cluster) - result_dataframe = full_dataframe[["Track ID", "t", "z", "y", "x", "Cluster"]] - csv_file_name = ( - csv_file_name_original - + track_arrays_array_names[track_arrays_array.index(track_arrays)] - + ".csv" - ) + track_arrays_array = [ + shape_dynamic_covariance_matrix, + shape_covariance_matrix, + dynamic_covariance_matrix, + ] - if os.path.exists(csv_file_name): - os.remove(csv_file_name) - result_dataframe.to_csv(csv_file_name, index=False) + track_arrays_array_names = ["shape_dynamic", "shape", "dynamic"] + clusterable_track_arrays = [ + shape_dynamic_covariance_2d, + shape_covariance_2d, + dynamic_covariance_2d, + ] - mean_matrix_file_name = ( - csv_file_name_original - + track_arrays_array_names[track_arrays_array.index(track_arrays)] - + f"_{metric}_covariance.npy" - ) - np.save(mean_matrix_file_name, track_arrays) + for track_arrays in track_arrays_array: + clusterable_track_array = clusterable_track_arrays[ + track_arrays_array.index(track_arrays) + ] + shape_dynamic_cosine_distance = pdist( + clusterable_track_array, metric=metric + ) - linkage_npy_file_name = ( - csv_file_name_original - + track_arrays_array_names[track_arrays_array.index(track_arrays)] - + f"_{metric}_linkage.npy" - ) - np.save(linkage_npy_file_name, shape_dynamic_linkage_matrix) + shape_dynamic_linkage_matrix = linkage( + shape_dynamic_cosine_distance, method=method + ) + shape_dynamic_cluster_labels = fcluster( + shape_dynamic_linkage_matrix, cluster_threshold, criterion=criterion + ) - cluster_labels_npy_file_name = ( - csv_file_name_original - + track_arrays_array_names[track_arrays_array.index(track_arrays)] - + f"_{metric}_cluster_labels.npy" - ) - np.save(cluster_labels_npy_file_name, shape_dynamic_cluster_labels) + cluster_centroids = calculate_cluster_centroids( + clusterable_track_array, shape_dynamic_cluster_labels + ) + silhouette = silhouette_score( + clusterable_track_array, shape_dynamic_cluster_labels, metric=metric + ) + wcss_value = calculate_wcss( + clusterable_track_array, shape_dynamic_cluster_labels, cluster_centroids + ) - + silhouette_file_name = os.path.join( + csv_file_name_original + + track_arrays_array_names[track_arrays_array.index(track_arrays)] + + f"_silhouette_{metric}_{cluster_threshold}.npy" + ) + np.save(silhouette_file_name, silhouette) + + wcss_file_name = os.path.join( + csv_file_name_original + + track_arrays_array_names[track_arrays_array.index(track_arrays)] + + f"_wcss_{metric}_{cluster_threshold}.npy" + ) + np.save(wcss_file_name, wcss_value) + track_id_to_cluster = { + track_id: cluster_label + for track_id, cluster_label in zip( + analysis_track_ids, shape_dynamic_cluster_labels + ) + } + full_dataframe["Cluster"] = full_dataframe["Track ID"].map( + track_id_to_cluster + ) + result_dataframe = full_dataframe[ + ["Track ID", "t", "z", "y", "x", "Cluster"] + ] + csv_file_name = ( + csv_file_name_original + + track_arrays_array_names[track_arrays_array.index(track_arrays)] + + ".csv" + ) + + if os.path.exists(csv_file_name): + os.remove(csv_file_name) + result_dataframe.to_csv(csv_file_name, index=False) + + mean_matrix_file_name = ( + csv_file_name_original + + track_arrays_array_names[track_arrays_array.index(track_arrays)] + + f"_{metric}_covariance.npy" + ) + np.save(mean_matrix_file_name, track_arrays) + + linkage_npy_file_name = ( + csv_file_name_original + + track_arrays_array_names[track_arrays_array.index(track_arrays)] + + f"_{metric}_linkage.npy" + ) + np.save(linkage_npy_file_name, shape_dynamic_linkage_matrix) + + cluster_labels_npy_file_name = ( + csv_file_name_original + + track_arrays_array_names[track_arrays_array.index(track_arrays)] + + f"_{metric}_cluster_labels.npy" + ) + np.save(cluster_labels_npy_file_name, shape_dynamic_cluster_labels) def convert_tracks_to_arrays( @@ -1741,9 +1782,9 @@ def convert_tracks_to_arrays( cluster_threshold_shape=4, method="ward", criterion="maxclust", - starting_label_shape_dynamic = 0, - starting_label_dynamic = 0, - starting_label_shape = 0, + starting_label_shape_dynamic=0, + starting_label_dynamic=0, + starting_label_shape=0, ): analysis_track_ids = [] @@ -1779,30 +1820,35 @@ def convert_tracks_to_arrays( if min_length is not None else True ): - - covariance_shape_dynamic = compute_covariance_matrix(shape_dynamic_track_array) - - covariance_shape = compute_covariance_matrix( - shape_track_array - ) - - covariance_dynamic = compute_covariance_matrix( - dynamic_track_array + + covariance_shape_dynamic = compute_covariance_matrix( + shape_dynamic_track_array ) - if covariance_shape_dynamic is not None and covariance_shape is not None and covariance_dynamic is not None: + + covariance_shape = compute_covariance_matrix(shape_track_array) + + covariance_dynamic = compute_covariance_matrix(dynamic_track_array) + if ( + covariance_shape_dynamic is not None + and covariance_shape is not None + and covariance_dynamic is not None + ): ( - shape_dynamic_covariance, - shape_dynamic_eigenvectors, - ) = covariance_shape_dynamic - shape_covariance, shape_eigenvectors = covariance_shape + shape_dynamic_covariance, + shape_dynamic_eigenvectors, + ) = covariance_shape_dynamic + shape_covariance, shape_eigenvectors = covariance_shape dynamic_covaraince, dynamic_eigenvectors = covariance_dynamic shape_dynamic_eigenvectors_matrix.extend(shape_dynamic_eigenvectors) shape_eigenvectors_matrix.extend(shape_eigenvectors) dynamic_eigenvectors_matrix.extend(dynamic_eigenvectors) analysis_track_ids.append(track_id) - if len(shape_dynamic_eigenvectors_matrix) > 0 and len(dynamic_eigenvectors_matrix) > 0 and len(shape_eigenvectors_matrix) > 0: - - + if ( + len(shape_dynamic_eigenvectors_matrix) > 0 + and len(dynamic_eigenvectors_matrix) > 0 + and len(shape_eigenvectors_matrix) > 0 + ): + shape_dynamic_eigenvectors_3d = np.dstack(shape_dynamic_eigenvectors_matrix) shape_eigenvectors_3d = np.dstack(shape_eigenvectors_matrix) dynamic_eigenvectors_3d = np.dstack(dynamic_eigenvectors_matrix) @@ -1810,130 +1856,25 @@ def convert_tracks_to_arrays( shape_dynamic_eigenvectors_2d = shape_dynamic_eigenvectors_3d.reshape( len(analysis_track_ids), -1 ) - shape_eigenvectors_2d = shape_eigenvectors_3d.reshape(len(analysis_track_ids), -1) + shape_eigenvectors_2d = shape_eigenvectors_3d.reshape( + len(analysis_track_ids), -1 + ) dynamic_eigenvectors_2d = dynamic_eigenvectors_3d.reshape( len(analysis_track_ids), -1 ) - shape_dynamic_eigenvectors_1d = np.array(shape_dynamic_eigenvectors_2d) - shape_eigenvectors_1d = np.array(shape_eigenvectors_2d) - dynamic_eigenvectors_1d = np.array(dynamic_eigenvectors_2d) - - - shape_dynamic_cosine_distance = pdist(shape_dynamic_eigenvectors_1d, metric=metric) - - shape_dynamic_linkage_matrix = linkage(shape_dynamic_cosine_distance, method=method) - try: - shape_dynamic_cluster_labels = fcluster( - shape_dynamic_linkage_matrix, cluster_threshold_shape_dynamic, criterion=criterion - ) + starting_label_shape_dynamic - shape_dynamic_cluster_centroids = calculate_cluster_centroids( - shape_dynamic_eigenvectors_1d, shape_dynamic_cluster_labels - ) - shape_dynamic_silhouette = silhouette_score( - shape_dynamic_eigenvectors_1d, shape_dynamic_cluster_labels, metric=metric - ) - shape_dynamic_wcss_value = calculate_wcss( - shape_dynamic_eigenvectors_1d, shape_dynamic_cluster_labels, shape_dynamic_cluster_centroids - ) - except Exception as e: - print(e) - shape_dynamic_cluster_labels = fcluster( - shape_dynamic_linkage_matrix, 1, criterion='maxclust' - ) + starting_label_shape_dynamic - shape_dynamic_silhouette = np.nan - shape_dynamic_wcss_value = np.nan - - - - dynamic_cosine_distance = pdist(dynamic_eigenvectors_1d, metric=metric) - - dynamic_linkage_matrix = linkage(dynamic_cosine_distance, method=method) - try: - dynamic_cluster_labels = fcluster( - dynamic_linkage_matrix, cluster_threshold_dynamic, criterion=criterion - ) + starting_label_dynamic - dynamic_cluster_centroids = calculate_cluster_centroids( - dynamic_eigenvectors_1d, dynamic_cluster_labels - ) - dynamic_silhouette = silhouette_score( - dynamic_eigenvectors_1d, dynamic_cluster_labels, metric=metric - ) - dynamic_wcss_value = calculate_wcss( - dynamic_eigenvectors_1d, dynamic_cluster_labels, dynamic_cluster_centroids - ) - except: - dynamic_cluster_labels = fcluster( - dynamic_linkage_matrix, 1, criterion='maxclust' - ) + starting_label_dynamic - dynamic_silhouette = np.nan - dynamic_wcss_value = np.nan - - - - shape_cosine_distance = pdist(shape_eigenvectors_1d, metric=metric) - - shape_linkage_matrix = linkage(shape_cosine_distance, method=method) - try: - shape_cluster_labels = fcluster( - shape_linkage_matrix, cluster_threshold_shape, criterion=criterion - ) + starting_label_shape - shape_cluster_centroids = calculate_cluster_centroids( - shape_eigenvectors_1d, shape_cluster_labels - ) - shape_silhouette = silhouette_score( - shape_eigenvectors_1d, shape_cluster_labels, metric=metric - ) - shape_wcss_value = calculate_wcss( - shape_eigenvectors_1d, shape_cluster_labels, shape_cluster_centroids - ) - except: - shape_cluster_labels = fcluster( - shape_linkage_matrix, 1, criterion='maxclust' - ) + starting_label_shape - shape_silhouette = np.nan - shape_wcss_value = np.nan - - - - for track_id in analysis_track_ids: - shape_dynamic_cluster_labels_dict = { - track_id: cluster_label - for track_id, cluster_label in zip( - analysis_track_ids, shape_dynamic_cluster_labels - ) - } - shape_cluster_labels_dict = { - track_id: cluster_label - for track_id, cluster_label in zip(analysis_track_ids, shape_cluster_labels) - } - dynamic_cluster_labels_dict = { - track_id: cluster_label - for track_id, cluster_label in zip( - analysis_track_ids, dynamic_cluster_labels - ) - } + shape_dynamic_covariance_2d = np.array(shape_dynamic_eigenvectors_2d) + shape_covariance_2d = np.array(shape_eigenvectors_2d) + dynamic_covariance_2d = np.array(dynamic_eigenvectors_2d) return ( - shape_dynamic_eigenvectors_1d, - shape_eigenvectors_1d, - dynamic_eigenvectors_1d, - shape_dynamic_cluster_labels_dict, - shape_cluster_labels_dict, - dynamic_cluster_labels_dict, - shape_dynamic_linkage_matrix, - shape_linkage_matrix, - dynamic_linkage_matrix, - shape_dynamic_silhouette, - shape_dynamic_wcss_value, - shape_silhouette, - shape_wcss_value, - dynamic_silhouette, - dynamic_wcss_value, + shape_dynamic_covariance_2d, + shape_covariance_2d, + dynamic_covariance_2d, analysis_track_ids, ) - - + + def convert_tracks_to_simple_arrays( analysis_vectors, min_length=None, @@ -1943,10 +1884,10 @@ def convert_tracks_to_simple_arrays( cluster_threshold_shape=4, method="ward", criterion="maxclust", - starting_label_shape_dynamic = 0, - starting_label_dynamic = 0, - starting_label_shape = 0, - t_delta = 10 + starting_label_shape_dynamic=0, + starting_label_dynamic=0, + starting_label_shape=0, + t_delta=10, ): analysis_track_ids = [] @@ -1982,28 +1923,35 @@ def convert_tracks_to_simple_arrays( if min_length is not None else True ): - - covariance_shape_dynamic = compute_raw_matrix(shape_dynamic_track_array, t_delta=t_delta) - - covariance_shape = compute_raw_matrix( - shape_track_array, t_delta=t_delta + + covariance_shape_dynamic = compute_raw_matrix( + shape_dynamic_track_array, t_delta=t_delta ) - + + covariance_shape = compute_raw_matrix(shape_track_array, t_delta=t_delta) + covariance_dynamic = compute_raw_matrix( dynamic_track_array, t_delta=t_delta ) - if covariance_shape_dynamic is not None and covariance_shape is not None and covariance_dynamic is not None: - + if ( + covariance_shape_dynamic is not None + and covariance_shape is not None + and covariance_dynamic is not None + ): + shape_dynamic_eigenvectors = covariance_shape_dynamic - shape_eigenvectors = covariance_shape + shape_eigenvectors = covariance_shape dynamic_eigenvectors = covariance_dynamic shape_dynamic_eigenvectors_matrix.extend(shape_dynamic_eigenvectors) shape_eigenvectors_matrix.extend(shape_eigenvectors) dynamic_eigenvectors_matrix.extend(dynamic_eigenvectors) analysis_track_ids.append(track_id) - if len(shape_dynamic_eigenvectors_matrix) > 0 and len(dynamic_eigenvectors_matrix) > 0 and len(shape_eigenvectors_matrix) > 0: - - + if ( + len(shape_dynamic_eigenvectors_matrix) > 0 + and len(dynamic_eigenvectors_matrix) > 0 + and len(shape_eigenvectors_matrix) > 0 + ): + shape_dynamic_eigenvectors_3d = np.dstack(shape_dynamic_eigenvectors_matrix) shape_eigenvectors_3d = np.dstack(shape_eigenvectors_matrix) dynamic_eigenvectors_3d = np.dstack(dynamic_eigenvectors_matrix) @@ -2011,7 +1959,9 @@ def convert_tracks_to_simple_arrays( shape_dynamic_eigenvectors_2d = shape_dynamic_eigenvectors_3d.reshape( len(analysis_track_ids), -1 ) - shape_eigenvectors_2d = shape_eigenvectors_3d.reshape(len(analysis_track_ids), -1) + shape_eigenvectors_2d = shape_eigenvectors_3d.reshape( + len(analysis_track_ids), -1 + ) dynamic_eigenvectors_2d = dynamic_eigenvectors_3d.reshape( len(analysis_track_ids), -1 ) @@ -2020,82 +1970,103 @@ def convert_tracks_to_simple_arrays( shape_eigenvectors_1d = np.array(shape_eigenvectors_2d) dynamic_eigenvectors_1d = np.array(dynamic_eigenvectors_2d) + shape_dynamic_cosine_distance = pdist( + shape_dynamic_eigenvectors_1d, metric=metric + ) - shape_dynamic_cosine_distance = pdist(shape_dynamic_eigenvectors_1d, metric=metric) - - shape_dynamic_linkage_matrix = linkage(shape_dynamic_cosine_distance, method=method) + shape_dynamic_linkage_matrix = linkage( + shape_dynamic_cosine_distance, method=method + ) try: - shape_dynamic_cluster_labels = fcluster( - shape_dynamic_linkage_matrix, cluster_threshold_shape_dynamic, criterion=criterion - ) + starting_label_shape_dynamic + shape_dynamic_cluster_labels = ( + fcluster( + shape_dynamic_linkage_matrix, + cluster_threshold_shape_dynamic, + criterion=criterion, + ) + + starting_label_shape_dynamic + ) shape_dynamic_cluster_centroids = calculate_cluster_centroids( - shape_dynamic_eigenvectors_1d, shape_dynamic_cluster_labels - ) + shape_dynamic_eigenvectors_1d, shape_dynamic_cluster_labels + ) shape_dynamic_silhouette = silhouette_score( - shape_dynamic_eigenvectors_1d, shape_dynamic_cluster_labels, metric=metric + shape_dynamic_eigenvectors_1d, + shape_dynamic_cluster_labels, + metric=metric, ) shape_dynamic_wcss_value = calculate_wcss( - shape_dynamic_eigenvectors_1d, shape_dynamic_cluster_labels, shape_dynamic_cluster_centroids + shape_dynamic_eigenvectors_1d, + shape_dynamic_cluster_labels, + shape_dynamic_cluster_centroids, ) except Exception as e: print(e) - shape_dynamic_cluster_labels = fcluster( - shape_dynamic_linkage_matrix, 1, criterion='maxclust' - ) + starting_label_shape_dynamic + shape_dynamic_cluster_labels = ( + fcluster(shape_dynamic_linkage_matrix, 1, criterion="maxclust") + + starting_label_shape_dynamic + ) shape_dynamic_silhouette = np.nan shape_dynamic_wcss_value = np.nan - - dynamic_cosine_distance = pdist(dynamic_eigenvectors_1d, metric=metric) dynamic_linkage_matrix = linkage(dynamic_cosine_distance, method=method) try: - dynamic_cluster_labels = fcluster( - dynamic_linkage_matrix, cluster_threshold_dynamic, criterion=criterion - ) + starting_label_dynamic + dynamic_cluster_labels = ( + fcluster( + dynamic_linkage_matrix, + cluster_threshold_dynamic, + criterion=criterion, + ) + + starting_label_dynamic + ) dynamic_cluster_centroids = calculate_cluster_centroids( - dynamic_eigenvectors_1d, dynamic_cluster_labels - ) + dynamic_eigenvectors_1d, dynamic_cluster_labels + ) dynamic_silhouette = silhouette_score( - dynamic_eigenvectors_1d, dynamic_cluster_labels, metric=metric + dynamic_eigenvectors_1d, dynamic_cluster_labels, metric=metric ) dynamic_wcss_value = calculate_wcss( - dynamic_eigenvectors_1d, dynamic_cluster_labels, dynamic_cluster_centroids + dynamic_eigenvectors_1d, + dynamic_cluster_labels, + dynamic_cluster_centroids, + ) + except Exception as e: + print(f" Dynamic clustering error: {e} ") + dynamic_cluster_labels = ( + fcluster(dynamic_linkage_matrix, 1, criterion="maxclust") + + starting_label_dynamic ) - except: - dynamic_cluster_labels = fcluster( - dynamic_linkage_matrix, 1, criterion='maxclust' - ) + starting_label_dynamic dynamic_silhouette = np.nan - dynamic_wcss_value = np.nan - - + dynamic_wcss_value = np.nan shape_cosine_distance = pdist(shape_eigenvectors_1d, metric=metric) shape_linkage_matrix = linkage(shape_cosine_distance, method=method) try: - shape_cluster_labels = fcluster( - shape_linkage_matrix, cluster_threshold_shape, criterion=criterion - ) + starting_label_shape + shape_cluster_labels = ( + fcluster( + shape_linkage_matrix, cluster_threshold_shape, criterion=criterion + ) + + starting_label_shape + ) shape_cluster_centroids = calculate_cluster_centroids( - shape_eigenvectors_1d, shape_cluster_labels - ) + shape_eigenvectors_1d, shape_cluster_labels + ) shape_silhouette = silhouette_score( - shape_eigenvectors_1d, shape_cluster_labels, metric=metric + shape_eigenvectors_1d, shape_cluster_labels, metric=metric ) shape_wcss_value = calculate_wcss( shape_eigenvectors_1d, shape_cluster_labels, shape_cluster_centroids ) - except: - shape_cluster_labels = fcluster( - shape_linkage_matrix, 1, criterion='maxclust' - ) + starting_label_shape - shape_silhouette = np.nan - shape_wcss_value = np.nan - - + except Exception as e: + print(f" Shape clustering error: {e}") + shape_cluster_labels = ( + fcluster(shape_linkage_matrix, 1, criterion="maxclust") + + starting_label_shape + ) + shape_silhouette = np.nan + shape_wcss_value = np.nan for track_id in analysis_track_ids: shape_dynamic_cluster_labels_dict = { @@ -2106,7 +2077,9 @@ def convert_tracks_to_simple_arrays( } shape_cluster_labels_dict = { track_id: cluster_label - for track_id, cluster_label in zip(analysis_track_ids, shape_cluster_labels) + for track_id, cluster_label in zip( + analysis_track_ids, shape_cluster_labels + ) } dynamic_cluster_labels_dict = { track_id: cluster_label @@ -2133,13 +2106,12 @@ def convert_tracks_to_simple_arrays( dynamic_wcss_value, analysis_track_ids, ) - - + def compute_raw_matrix(track_arrays, t_delta): track_duration = track_arrays.shape[0] t_delta = int(t_delta) - + if track_duration < t_delta: repetitions = t_delta - track_duration last_row = track_arrays[-1, :] @@ -2149,30 +2121,31 @@ def compute_raw_matrix(track_arrays, t_delta): result_matrix = track_arrays[:t_delta, :] else: result_matrix = track_arrays - - + flattened_array = result_matrix.flatten() - - return flattened_array + + return flattened_array def compute_covariance_matrix(track_arrays): try: - covariance_matrix = np.cov(track_arrays, rowvar=False) - eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix) - eigenvalue_order = np.argsort(eigenvalues)[::-1] - eigenvalues = eigenvalues[eigenvalue_order] - eigenvectors = eigenvectors[:, eigenvalue_order] - normalized_eigenvectors = np.array([v / np.linalg.norm(v) for v in eigenvectors]) + covariance_matrix = np.cov(track_arrays, rowvar=False) + eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix) + eigenvalue_order = np.argsort(eigenvalues)[::-1] + eigenvalues = eigenvalues[eigenvalue_order] + eigenvectors = eigenvectors[:, eigenvalue_order] + normalized_eigenvectors = np.array( + [v / np.linalg.norm(v) for v in eigenvectors] + ) - real_part = np.real(normalized_eigenvectors) - imag_part = np.imag(normalized_eigenvectors) + real_part = np.real(normalized_eigenvectors) + imag_part = np.imag(normalized_eigenvectors) - concatenated_eigenvectors = np.concatenate((real_part, imag_part), axis=1) + concatenated_eigenvectors = np.concatenate((real_part, imag_part), axis=1) - return covariance_matrix, concatenated_eigenvectors + return covariance_matrix, concatenated_eigenvectors except Exception as e: - print(f'Covariance matric computation {e}') + print(f"Covariance matric computation {e}") class DenseLayer(nn.Module): diff --git a/src/napatrackmater/_version.py b/src/napatrackmater/_version.py index dead3bcf..a52e72b2 100644 --- a/src/napatrackmater/_version.py +++ b/src/napatrackmater/_version.py @@ -1,2 +1,2 @@ -__version__ = version = "4.7.3" -__version_tuple__ = version_tuple = (4, 7, 3) +__version__ = version = "4.7.4" +__version_tuple__ = version_tuple = (4, 7, 4)