From 3fa3980e40545b9041c25aec76860d35a55052cb Mon Sep 17 00:00:00 2001 From: kapoorlab Date: Mon, 25 Sep 2023 22:37:48 +0200 Subject: [PATCH] save features as csv --- src/napatrackmater/Trackvector.py | 41 +++++++++++++++++++++++++++++++ src/napatrackmater/_version.py | 4 +-- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/src/napatrackmater/Trackvector.py b/src/napatrackmater/Trackvector.py index dc3b26dc..f4630033 100644 --- a/src/napatrackmater/Trackvector.py +++ b/src/napatrackmater/Trackvector.py @@ -11,6 +11,7 @@ from scipy.cluster.hierarchy import linkage, fcluster from sklearn.ensemble import RandomForestClassifier from sklearn.cluster import KMeans +import csv class TrackVector(TrackMate): @@ -672,6 +673,15 @@ def perform_cosine_similarity( ) sorted_feature_names, sorted_importances = zip(*sorted_features) print("Sorted Feature Importances:") + _save_feature_importance( + sorted_feature_names, + normalized_importances, + csv_file_name_original, + track_arrays_array_names, + track_arrays_array, + track_arrays, + ) + for feature, importance in zip(sorted_feature_names, normalized_importances): print(f"{feature}: {importance}") @@ -694,6 +704,28 @@ def perform_cosine_similarity( result_dataframe.to_csv(csv_file_name, index=False) +def _save_feature_importance( + sorted_feature_names, + normalized_importances, + csv_file_name_original, + track_arrays_array_names, + track_arrays_array, + track_arrays, +): + data = list(zip(sorted_feature_names, normalized_importances)) + csv_file_name = ( + csv_file_name_original + + track_arrays_array_names[track_arrays_array.index(track_arrays)] + + "_feature_importance" + + ".csv" + ) + with open(csv_file_name, mode="w", newline="") as csv_file: + writer = csv.writer(csv_file) + writer.writerow(["Feature", "Importance"]) + for feature, importance in data: + writer.writerow([feature, importance]) + + def _perform_pca_clustering(track_arrays, num_clusters, num_components=3): pca = PCA(n_components=num_components) reduced_data = pca.fit_transform(track_arrays) @@ -794,8 +826,17 @@ def perform_pca( ) sorted_feature_names, sorted_importances = zip(*sorted_features) print("Sorted Feature Importances:") + _save_feature_importance( + sorted_feature_names, + normalized_importances, + csv_file_name_original, + track_arrays_array_names, + track_arrays_array, + track_arrays, + ) for feature, importance in zip(sorted_feature_names, normalized_importances): print(f"{feature}: {importance}") + print(f"filtered tracks {len(filtered_track_ids), len(cluster_labels)}") track_id_to_cluster = { track_id: cluster_label diff --git a/src/napatrackmater/_version.py b/src/napatrackmater/_version.py index c2da1d2b..9e0ec7b9 100644 --- a/src/napatrackmater/_version.py +++ b/src/napatrackmater/_version.py @@ -1,2 +1,2 @@ -__version__ = version = "4.2.5" -__version_tuple__ = version_tuple = (4, 2, 5) +__version__ = version = "4.2.6" +__version_tuple__ = version_tuple = (4, 2, 6)