diff --git a/docs/refinement_tree.ipynb b/docs/refinement_tree.ipynb index 1fe9ecb..b601413 100644 --- a/docs/refinement_tree.ipynb +++ b/docs/refinement_tree.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 6, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -33,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -69,7 +69,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -108,7 +108,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -172,35 +172,29 @@ "source": [ "## Feedback on performance and correctness\n", "\n", - "Currently (HEAD commit on `main` at August 30) `pytest tests/test_canonicalization.py::test_permutation_invariance` fails for 152 structures (exclusively timeout failures, no invariance failures):\n", + "Currently (HEAD commit on `main` at September 17) `pytest tests/test_canonicalization.py::test_permutation_invariance` fails for 104 structures (exclusively timeout failures, no invariance failures):\n", "\n", "```\n", - "=================================================== short test summary info ===================================================\n", + "====================================================== short test summary info ======================================================\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[C28_H32_N8_Zn_B_F4_2_H2O_in_P21_c] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[Paclitaxel] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[C28_H36_N8_Zn_C_F3_S_O3_2_C4_H8_O_in_Pca21] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[14Ethyl14methylheptadecanoic acid] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[1_3_diphenylpropane] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[Isohexan] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[18Methylnonadecanoic acid] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[Bicyclo[5.1.0]octa-1(7)-ene-8-one] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[q17_a14sadm_in_Pbca] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[tautomer1-amide] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[cyclophane2] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[tv027_0m_in_P2_1] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[EMIM-BF4] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[ZnC20H34N6O4_in_P2_1_c] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[n17_a96-1_in_P2_1] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[C12_H24_Cl2_N6_Zn_in_C2_c] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[cyclophane1] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[Cortisol] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[Mn2CO10] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[Z 16 ethyl 15,17,17 trimethyloctadec9enoic acid] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[E 16,16,17,18 tetramethylnonadec9enoic acid] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[CHEMBL2348759] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[C240] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[10tertButyl10isopropyltridecanoic acid] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[hexanitrohexaazaisowurtzitane] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[m18_a24alex_in_P2_1_2_1_2_1] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[C16_H22_N4_O6_S2_Zn_in_P2_1_c] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[C21_H42_Cl_N10_Zn_Cl_in_P21_c] - Failed: Timeout >10.0s\n", @@ -209,41 +203,27 @@ "FAILED tests/test_canonicalization.py::test_permutation_invariance[C180] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[Neohexan] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[C60H60] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[cyclobutane] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[C36_H72_Cl4_N14_Zn2_0_5_CH3CN_2_5_Et2O_in_P-1] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[cymantrene] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[C18_H24_N4_O4_Zn_in_P2_1_c] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[C24_H48_N12_Zn_C_F3_S_O3_2_in_P-1] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[n16_a100_in_P2_1_c] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[CCDC2216842v3] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[3 Methylpentan] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[E 20 methyldocos13enoic acid] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[nPentan] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[ferrocene-CH3] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[[8]cycloparaphenylene] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[2-8-dimethyldecane] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[TEMPO] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[C70] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[C18_H22_N4_O4_Zn_in_P-1] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[FeCO5] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[2_6_dimethylnaphthalene] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[2_3_dimethylnaphthalene] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[Os4CO16] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[C14_H16_Cl2_N4_Zn_in_P2_1_c] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[Isobutan] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[2_methyl_1_3_diphenylpropane] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[Ceftobiprol] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[Streptomycin] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[C60] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[C14_H18_Cl2_N4_Zn_in_P2_1_c] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[tnt] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[C15_H18_Cl2_N4_Zn_in_P2_1_n] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[tautomer2-enol] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[n17_a41sad_in_P2_1_n] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[Oxo] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[C21_H48_Cl_N10_Zn_S_O3_C_F3_in_P2_1_n] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[Zn_C30_H42_N8_S2_O6_in_P2_1_c] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[1_3_methyl_phenyl_3_phenylpropane] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[Neopentan] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[a15_a89a_in_P-1_New_I2_c] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[NH4PF6] - Failed: Timeout >10.0s\n", @@ -256,24 +236,12 @@ "FAILED tests/test_canonicalization.py::test_permutation_invariance[tv241_0m_in_P2_1_n] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[n16_a123_in_P2_1_2_1_2_1] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[PH4PF6] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[FeMo_cluster] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[TTF_TCNE] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[3,7,11,15 tetramethylhexadecanoic acid] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[Fe2CO9] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[acetylacetonate] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[Isopentan] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[Rubpy3] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[dodecaborate] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[n17_b82sad_in_P2_1_n] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[1_8_dimethylnaphthalene] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[Satraplatin] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[FeBr2TMGasme] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[Ir4CO12] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[dodecahedrane] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[m17_a35sad_in_P2_1] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[2palmitoyloxy3 14,15,16-trimethylheptadecanoyloxy propyl stearate] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[cyclohexane] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[1_methyl_1_3_diphenylpropane] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[ro007_in_P2_1_2_1_2_1] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[FeCl2TMG5NMe2asme] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[a15_a88c_in_P-1_New_P21_c] - Failed: Timeout >10.0s\n", @@ -283,43 +251,27 @@ "FAILED tests/test_canonicalization.py::test_permutation_invariance[Peroxo] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[C19_H24_N4_O4_Zn_0_13_H2O_in_P2_1_n] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[2 4 Dimethylbutan] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[ferrocene] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[rv222_in_P2_1_2_1_2_1] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[rv202_in_P2_1_c] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[9Z,12Z,26Z 35 methylheptatriaconta9,12,26trienoic acid] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[Pemetrexed] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[Icosanoic acid] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[nButan] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[CHEMBL415840] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[cf3alkyne] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[SbH4PF6] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[6,15Dimethyloctadecanoic acid] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[FeCO3B4H8] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[glycerol tristearate] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[Petersen_graph] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[cyclopentane] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[a16_a07sada_in_P2_1_c] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[C80] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[amino_acid_2] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[insulin] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[n16_a77sad1_in_P2_1] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[C20_H26_N4_O5_Zn_0_27_H2O_in_P2_1_n] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[chromocene-multi-attachment] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[Fe3CO12] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[cisplatin] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[CrCO3benzene] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[C18_H26_N4_Zn_Cl2_in_P2_1_2_1_2_1] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[1_5_dimethylnaphthalene] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[1_4_dimethylnaphthalene] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[hypercubane] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[rv006_in_P2_1_c] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[9Z,12Z,36E octatriaconta9,12,36trienoic acid] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[NiCOD2] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[C16_H30_N6_O4_Zn_in_P-1] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[Carboplatin] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[nHexan] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[CrCO6] - Failed: Timeout >10.0s\n", - "FAILED tests/test_canonicalization.py::test_permutation_invariance[2_7_dimethylnaphthalene] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[adamantane] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[a1140_in_P2_1_c] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[tv342_in_P1_New_P-1] - Failed: Timeout >10.0s\n", @@ -328,7 +280,7 @@ "FAILED tests/test_canonicalization.py::test_permutation_invariance[tv217_p1_in_P1_New_P-1] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[qv043_in_P2_1_New_Pca21] - Failed: Timeout >10.0s\n", "FAILED tests/test_canonicalization.py::test_permutation_invariance[Vancomycin] - Failed: Timeout >10.0s\n", - "========================================= 152 failed, 81 passed in 1659.66s (0:27:39) =========================================\n", + "=========================================== 104 failed, 129 passed in 1199.82s (0:19:59) ============================================\n", "\n", "```\n", "\n", diff --git a/tucan/canonicalization.py b/tucan/canonicalization.py index d2dbcef..d9c7062 100644 --- a/tucan/canonicalization.py +++ b/tucan/canonicalization.py @@ -68,12 +68,24 @@ def get_refinement_tree_node_children(m: nx.Graph) -> Generator[nx.Graph, None, def filter_out_automorphisms(ms: list[nx.Graph]) -> list[nx.Graph]: - # TODO: Make this more efficient. E.g., compare labelings as in `get_canonical_molecule()`? - node_matcher = nx.algorithms.isomorphism.categorical_node_match(PARTITION, 0) - filtered_ms = set(ms) - for m_i, m_j in combinations(ms, 2): - if nx.is_isomorphic(m_i, m_j, node_match=node_matcher): - filtered_ms.discard(m_j) + filtered_ms = set() + labelings = set() + + for m in ms: + m_relabeled_by_partition = nx.relabel_nodes( + m, + dict(zip(list(m), nx.get_node_attributes(m, PARTITION).values())), + copy=True, + ) + labeling = tuple( + sorted([tuple(sorted(edge)) for edge in m_relabeled_by_partition.edges()]) + ) + + if labeling in labelings: + continue + + labelings.add(labeling) + filtered_ms.add(m) return list(filtered_ms)