diff --git a/src/napatrackmater/Trackvector.py b/src/napatrackmater/Trackvector.py index 518602d5..7a052729 100644 --- a/src/napatrackmater/Trackvector.py +++ b/src/napatrackmater/Trackvector.py @@ -18,6 +18,7 @@ from sklearn.neighbors import KNeighborsClassifier from joblib import dump + class TrackVector(TrackMate): def __init__( self, @@ -562,11 +563,9 @@ def create_gt_analysis_vectors_dict(global_shape_dynamic_dataframe: pd.DataFrame gt_dataframe = track_data[ [ "Cluster", - ] ] - - + full_dataframe = track_data[ [ "Track ID", @@ -587,7 +586,7 @@ def create_gt_analysis_vectors_dict(global_shape_dynamic_dataframe: pd.DataFrame "Distance_Cell_mask", "Radial_Angle", "Cell_Axis_Mask", - "Cluster" + "Cluster", ] ] @@ -602,44 +601,54 @@ def create_gt_analysis_vectors_dict(global_shape_dynamic_dataframe: pd.DataFrame return gt_analysis_vectors + def create_global_gt_dataframe( - - full_dataframe, - ground_truth_csv_file, - calibration_z, - calibration_y, - calibration_x, - time_veto_threshold=0.0, space_veto_threshold=5.0, cell_type_key = "Celltype_label" + full_dataframe, + ground_truth_csv_file, + calibration_z, + calibration_y, + calibration_x, + time_veto_threshold=0.0, + space_veto_threshold=5.0, + cell_type_key="Celltype_label", ): - - ground_truth_data_frame = pd.read_csv(ground_truth_csv_file) - ground_truth_data_frame.dropna(subset=[cell_type_key], inplace=True) - - # Prepare ground truth tuples and labels - ground_truth_tuples = np.array([ - ground_truth_data_frame['FRAME'].values, - ground_truth_data_frame['POSITION_Z'].values / calibration_z, - ground_truth_data_frame['POSITION_Y'].values / calibration_y, - ground_truth_data_frame['POSITION_X'].values / calibration_x, - ]).T - ground_truth_labels = ground_truth_data_frame[cell_type_key].values - theory_tuples_spatial = full_dataframe[["t", "z", "y", "x"]].values - - tree_spatial = cKDTree(theory_tuples_spatial) - - # Initialize arrays to store the indices of the closest theory tuples and their corresponding ground truth labels - closest_theory_indices = [] - corresponding_ground_truth_labels = [] - closest_theory_tuples_found = [] - closest_theory_track_ids = [] - # Find the closest theory tuple for each ground truth tuple - for i, (ground_tuple, ground_label) in enumerate(zip(ground_truth_tuples, ground_truth_labels)): + + ground_truth_data_frame = pd.read_csv(ground_truth_csv_file) + ground_truth_data_frame.dropna(subset=[cell_type_key], inplace=True) + + # Prepare ground truth tuples and labels + ground_truth_tuples = np.array( + [ + ground_truth_data_frame["FRAME"].values, + ground_truth_data_frame["POSITION_Z"].values / calibration_z, + ground_truth_data_frame["POSITION_Y"].values / calibration_y, + ground_truth_data_frame["POSITION_X"].values / calibration_x, + ] + ).T + ground_truth_labels = ground_truth_data_frame[cell_type_key].values + theory_tuples_spatial = full_dataframe[["t", "z", "y", "x"]].values + + tree_spatial = cKDTree(theory_tuples_spatial) + + # Initialize arrays to store the indices of the closest theory tuples and their corresponding ground truth labels + closest_theory_indices = [] + corresponding_ground_truth_labels = [] + closest_theory_tuples_found = [] + closest_theory_track_ids = [] + # Find the closest theory tuple for each ground truth tuple + for i, (ground_tuple, ground_label) in enumerate( + zip(ground_truth_tuples, ground_truth_labels) + ): # Use the KD-Tree for nearest-neighbor search - spatial_valid_indices = tree_spatial.query_ball_point(ground_tuple, space_veto_threshold, p=2) + spatial_valid_indices = tree_spatial.query_ball_point( + ground_tuple, space_veto_threshold, p=2 + ) if spatial_valid_indices: # Find the closest theory index within the common indices - closest_theory_index = spatial_valid_indices[np.argmin(tree_spatial.query(ground_tuple)[0])] + closest_theory_index = spatial_valid_indices[ + np.argmin(tree_spatial.query(ground_tuple)[0]) + ] # Get the closest theory tuple closest_theory_tuple = full_dataframe.loc[closest_theory_index] @@ -647,38 +656,39 @@ def create_global_gt_dataframe( # Check if the index is valid, within the DataFrame's range, and satisfies the time veto if ( 0 <= closest_theory_index < len(full_dataframe) - and abs(closest_theory_tuple['t'] - ground_tuple[0]) <= time_veto_threshold + and abs(closest_theory_tuple["t"] - ground_tuple[0]) + <= time_veto_threshold ): closest_theory_indices.append(closest_theory_index) corresponding_ground_truth_labels.append(ground_label) closest_theory_tuples_found.append(closest_theory_tuple) - closest_theory_track_ids.append(closest_theory_tuple['Track ID']) + closest_theory_track_ids.append(closest_theory_tuple["Track ID"]) + + track_id_to_cluster = { + track_id: cluster_label + for track_id, cluster_label in zip( + closest_theory_track_ids, corresponding_ground_truth_labels + ) + } + full_dataframe["Cluster"] = full_dataframe["Track ID"].map(track_id_to_cluster) + + return full_dataframe - track_id_to_cluster = { - track_id: cluster_label - for track_id, cluster_label in zip( - closest_theory_track_ids, corresponding_ground_truth_labels - ) - } - full_dataframe["Cluster"] = full_dataframe["Track ID"].map(track_id_to_cluster) - - return full_dataframe def supervised_clustering( - csv_file_name, - gt_analysis_vectors, - num_clusters, + csv_file_name, + gt_analysis_vectors, + num_clusters, ): - csv_file_name_original = csv_file_name + '_training_data' + csv_file_name_original = csv_file_name + "_training_data" data_list = [] - for track_id, ( shape_dynamic_dataframe_list, gt_dataframe_list, full_dataframe_list, ) in gt_analysis_vectors.items(): - + shape_dynamic_track_array = np.array( [ [item for item in record.values()] @@ -688,36 +698,39 @@ def supervised_clustering( gt_track_array = np.array( [[item for item in record.values()] for record in gt_dataframe_list] - ) if not np.isnan(gt_track_array[0]) and shape_dynamic_track_array.shape[0] > 1: - print(gt_track_array[0][0], shape_dynamic_track_array.shape) ( shape_dynamic_covariance, shape_dynamic_eigenvectors, ) = compute_covariance_matrix(shape_dynamic_track_array) flattened_covariance = shape_dynamic_covariance.flatten() - data_list.append({ - 'Flattened_Covariance': flattened_covariance, - 'gt_label': gt_track_array[0][0] - }) + data_list.append( + { + "Flattened_Covariance": flattened_covariance, + "gt_label": gt_track_array[0][0], + } + ) result_dataframe = pd.DataFrame(data_list) if os.path.exists(csv_file_name_original): - os.remove(csv_file_name_original) + os.remove(csv_file_name_original) result_dataframe.to_csv(csv_file_name_original, index=False) - X = np.vstack(result_dataframe['Flattened_Covariance'].values) - y = result_dataframe['gt_label'].values - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.01, random_state=42) + X = np.vstack(result_dataframe["Flattened_Covariance"].values) + y = result_dataframe["gt_label"].values + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.01, random_state=42 + ) knn = KNeighborsClassifier(n_neighbors=num_clusters) knn.fit(X_train, y_train) accuracy = knn.score(X_test, y_test) print(f"Model Accuracy: {accuracy:.2f}") - model_filename = 'knn_model.joblib' + model_filename = "knn_model.joblib" dump(knn, model_filename) - return knn + return knn + def unsupervised_clustering( full_dataframe, diff --git a/src/napatrackmater/_version.py b/src/napatrackmater/_version.py index c142045d..e6a5fdba 100644 --- a/src/napatrackmater/_version.py +++ b/src/napatrackmater/_version.py @@ -1,2 +1,2 @@ -__version__ = version = "4.3.9" -__version_tuple__ = version_tuple = (4, 3, 9) +__version__ = version = "4.4.0" +__version_tuple__ = version_tuple = (4, 4, 0)