Skip to content

Commit

Permalink
Merge pull request #10 from RECETOX/hechth/issue8
Browse files Browse the repository at this point in the history
Add matchms with 0 scores into the output sctructure
  • Loading branch information
hechth authored Oct 31, 2023
2 parents 02d0c18 + 2a1fbf5 commit 616ca13
Show file tree
Hide file tree
Showing 8 changed files with 247,935 additions and 303,850 deletions.
111 changes: 111 additions & 0 deletions analysis/Python_scripts/add_zeros.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from matchms.importing import load_from_msp\n",
"from matchms.logging_functions import set_matchms_logger_level\n",
"import itertools\n",
"\n",
"set_matchms_logger_level(\"ERROR\")"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [],
"source": [
"predicted_spectra = list(load_from_msp(\"../data/filtered/simulated_matchms_filter_1%I_all_peaks.msp\"))\n",
"predicted_spectra_metadata= pd.DataFrame.from_dict([x.metadata for x in predicted_spectra])\n",
"predicted_spectra_metadata.rename(columns={'compound_name': 'query'}, inplace=True)\n",
"predicted_spectra_names = predicted_spectra_metadata['query'].to_list()"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [],
"source": [
"experimental_spectra = list(load_from_msp(\"../data/experimental/RECETOX_GC-EI_MS_20201028.msp\"))\n",
"experimental_spectra_metadata= pd.DataFrame.from_dict([x.metadata for x in experimental_spectra])\n",
"experimental_spectra_metadata.rename(columns={'compound_name': 'reference'}, inplace=True)\n",
"experimental_spectra_names = experimental_spectra_metadata['reference'].to_list()"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [],
"source": [
"all_combinations = set(itertools.product(predicted_spectra_names, experimental_spectra_names))"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [],
"source": [
"matches = pd.read_csv(\"../data/output_matching/matchms/matchms_tol_0.01_1%I_top5.tsv\", sep=\"\\t\")\n",
"matches.set_index(['query', 'reference'], inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [],
"source": [
"existing_rows = set(matches.index.to_list())\n",
"rows_to_add = list(all_combinations - existing_rows)"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [],
"source": [
"all_zeros = pd.DataFrame(0, index=rows_to_add, columns=matches.columns)\n",
"merged = pd.concat([matches, all_zeros]).reset_index(names=['query', 'reference'])"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [],
"source": [
"merged.to_csv(\"../data/output_matching/matchms/matchms_tol_0.01_1%I_top5_with_0s.tsv\", sep=\"\\t\", index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Binary file modified analysis/Python_scripts/chemical_composition_boxplot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1,072 changes: 8 additions & 1,064 deletions analysis/Python_scripts/scatterplot.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions analysis/Python_scripts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ def append_classes(df, left_on):
"S": [has_atom(m, 'S') for m in molecules],
"P": [has_atom(m, 'P') for m in molecules],
"Si": [has_atom(m, 'Si') for m in molecules],
"C,O,N,H": [has_organic_atoms(m) for m in molecules],
# "C,O,N,H": [has_organic_atoms(m) for m in molecules],
})
merged_df = pd.merge(df, class_names, left_on=left_on, right_on='molname')
return merged_df

# Define a function to map the true columns to a list of names
def get_true_names(row, df):
return [col for col in df.columns[11:18] if row[col]]
return [col for col in df.columns[11:17] if row[col]]
Loading

0 comments on commit 616ca13

Please sign in to comment.