Skip to content

Commit

Permalink
small refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
hechth committed Jul 10, 2024
1 parent 5872db6 commit e564efe
Show file tree
Hide file tree
Showing 7 changed files with 18 additions and 30 deletions.
7 changes: 2 additions & 5 deletions analysis/Python_scripts/Fig3_correlations.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,9 @@
from utils import *
from plotting import *

matchms_scores = pd.read_csv("../data/output_matching/matchms/matchms_tol_0.0035_1%I_all_peaks_with_0s_only_matching.tsv", sep="\t")
matchms_scores.rename(columns={'CosineHungarian_0.0035_0.0_1.0_scores': 'scores'}, inplace=True)
matchms_scores.rename(columns={'CosineHungarian_0.0035_0.0_1.0_matches': 'matches'}, inplace=True)
matchms_scores = load_matchms_scores()

matchms_scores = append_classes(matchms_scores, 'query')
df = normalize_df(append_spectrum_metadata(matchms_scores), matches_norm_col=None)
df = normalize_df(matchms_scores, matches_norm_col=None)
del df['peak_comments']

matches_col = 'matches'
Expand Down
6 changes: 1 addition & 5 deletions analysis/Python_scripts/Fig4_superclass_histograms.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,7 @@
from utils import *
from plotting import *

matchms_scores = pd.read_csv("../data/output_matching/matchms/matchms_tol_0.0035_1%I_all_peaks_with_0s_only_matching.tsv", sep="\t")
matchms_scores.rename(columns={'CosineHungarian_0.0035_0.0_1.0_scores': 'scores'}, inplace=True)
matchms_scores.rename(columns={'CosineHungarian_0.0035_0.0_1.0_matches': 'matches'}, inplace=True)
matchms_scores = append_classes(matchms_scores, 'query')
matchms_scores = append_spectrum_metadata(matchms_scores)
matchms_scores = load_matchms_scores()

matchms_scores_superclass = preprocess_data(normalize_df(matchms_scores.copy()), ["superclass"])
larger_superclasses = matchms_scores_superclass.groupby("superclass").filter(lambda x: len(x) > 2)
Expand Down
6 changes: 1 addition & 5 deletions analysis/Python_scripts/Fig5_classes_boxplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,7 @@
from plotting import *


matchms_scores = pd.read_csv("../data/output_matching/matchms/matchms_tol_0.0035_1%I_all_peaks_with_0s_only_matching.tsv", sep="\t")
matchms_scores.rename(columns={'CosineHungarian_0.0035_0.0_1.0_scores': 'scores'}, inplace=True)
matchms_scores.rename(columns={'CosineHungarian_0.0035_0.0_1.0_matches': 'matches'}, inplace=True)
matchms_scores = append_classes(matchms_scores, 'query')
matchms_scores = append_spectrum_metadata(matchms_scores)
matchms_scores = load_matchms_scores()
merged = normalize_df(matchms_scores.copy())

scores_preprocessed_hierarchy = preprocess_data(merged, ["superclass", "class", "subclass"])
Expand Down
6 changes: 1 addition & 5 deletions analysis/Python_scripts/Fig6_benzene_subclasses_boxplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,7 @@
from plotting import *


matchms_scores = pd.read_csv("../data/output_matching/matchms/matchms_tol_0.0035_1%I_all_peaks_with_0s_only_matching.tsv", sep="\t")
matchms_scores.rename(columns={'CosineHungarian_0.0035_0.0_1.0_scores': 'scores'}, inplace=True)
matchms_scores.rename(columns={'CosineHungarian_0.0035_0.0_1.0_matches': 'matches'}, inplace=True)
matchms_scores = append_classes(matchms_scores, 'query')
matchms_scores = append_spectrum_metadata(matchms_scores)
matchms_scores = load_matchms_scores()
merged = normalize_df(matchms_scores.copy())

scores_preprocessed_hierarchy = preprocess_data(merged, ["superclass", "class", "subclass"])
Expand Down
7 changes: 2 additions & 5 deletions analysis/Python_scripts/Fig7_nitrogen_comparison.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
from utils import *
from plotting import boxplot_comparison

all_peaks_same = pd.read_csv("../data/output_matching/matchms/matchms_tol_0.0035_1%I_all_peaks_with_0s_only_matching.tsv", sep='\t')
all_peaks_same.rename(columns={'CosineHungarian_0.0035_0.0_1.0_scores': 'scores'}, inplace=True)
all_peaks_same.rename(columns={'CosineHungarian_0.0035_0.0_1.0_matches': 'matches'}, inplace=True)
all_peaks_same = append_spectrum_metadata(all_peaks_same)
merged_all_peaks_same = normalize_df(append_classes(all_peaks_same, 'query'))
matchms_scores = load_matchms_scores()
merged_all_peaks_same = normalize_df(matchms_scores)
mdf_comp = preprocess_data(merged_all_peaks_same, ["composition"])

baseline_cols= ['C,H', 'C,H,O', 'C,H,O,S', 'C,Cl,H,O', 'Br,C,H,O', 'C,Cl,H', 'C,Cl,H,O,S', 'C,Cl,F,H,O', 'C,H,O,P', 'C,H,O,P,S']
Expand Down
7 changes: 2 additions & 5 deletions analysis/Python_scripts/Fig8_p_and_s.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
from utils import *
from plotting import create_plot

all_peaks_same = pd.read_csv("../data/output_matching/matchms/matchms_tol_0.0035_1%I_all_peaks_with_0s_only_matching.tsv", sep='\t')
all_peaks_same.rename(columns={'CosineHungarian_0.0035_0.0_1.0_scores': 'scores'}, inplace=True)
all_peaks_same.rename(columns={'CosineHungarian_0.0035_0.0_1.0_matches': 'matches'}, inplace=True)
all_peaks_same = append_spectrum_metadata(all_peaks_same)
merged_all_peaks_same = normalize_df(append_classes(all_peaks_same, 'query'))
matchms_scores = load_matchms_scores()
merged_all_peaks_same = normalize_df(matchms_scores)
mdf_comp = preprocess_data(merged_all_peaks_same, ["composition"])

mdf_comp_ps = mdf_comp[mdf_comp['composition'].str.contains('S|P')]
Expand Down
9 changes: 9 additions & 0 deletions analysis/Python_scripts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,3 +332,12 @@ def normalize_df(df: pd.DataFrame, use_nist: bool = True, matches_norm_col: str
if matches_norm_col:
df[matches_col] = (df[matches_col] / df[matches_norm_col]) * 100
return df


def load_matchms_scores():
matchms_scores = pd.read_csv("../data/output_matching/matchms/matchms_tol_0.0035_1%I_all_peaks_with_0s_only_matching.tsv", sep="\t")
matchms_scores.rename(columns={'CosineHungarian_0.0035_0.0_1.0_scores': 'scores'}, inplace=True)
matchms_scores.rename(columns={'CosineHungarian_0.0035_0.0_1.0_matches': 'matches'}, inplace=True)
matchms_scores = append_classes(matchms_scores, 'query')
matchms_scores = append_spectrum_metadata(matchms_scores)
return matchms_scores

0 comments on commit e564efe

Please sign in to comment.