Merge pull request #10 from samblau/sam_dev

Sam dev
BlauGroup · Apr 12, 2023 · 62d5f2e · 62d5f2e
2 parents cec0936 + 41775f6
commit 62d5f2e
Show file tree

Hide file tree

Showing 123 changed files with 18,506 additions and 134 deletions.
diff --git a/HiPRGen/gen_euvl_testset.py b/HiPRGen/gen_euvl_testset.py
@@ -0,0 +1,72 @@
+print("Importing required files...")
+from pymatgen.core.structure import Molecule
+from pymatgen.analysis.graphs import MoleculeGraph
+from pymatgen.analysis.local_env import OpenBabelNN
+from monty.serialization import loadfn, dumpfn
+from mol_entry import MoleculeEntry
+from initial_state import find_mol_entry_by_entry_id
+import copy
+print("Done!")
+
+#list of mol_ids for species you want to find with each mol_id occupying its own line. Must have an empty line
+#at the end of the file, but empty lines will not be copied.
+print("Opening and reading species id file...")
+with open('atom_mapping_set.txt') as mol_id_doc: #open the text file and copy each line as the element of a list, removing '\n'
+ mol_id_list = []
+ for line in mol_id_doc:
+ if line.strip(): #excludes empty lines
+ mol_id_list.append(line[0:len(line)-1])
+print("Done!")
+
+print("Opening json file...")
+mol_json = "032923.json"
+database_entries = loadfn(mol_json) #loads the json file to a list of dictionaries
+print("Done!")
+
+print("Reading molecule entries from json file...") #converts dictionary from loaded json file to a list
+mol_entries= [MoleculeEntry.from_mp_doc(e) for e in database_entries]
+print("Done!")
+
+final_mol_ids = []
+
+print('Copying desired molecules to a new list...')
+for mol_id in mol_id_list: #takes the list of ids, finds their corresponding molecules in the json file 
+ for mol_entry in mol_entries:
+ m_id = mol_entry.entry_id
+ if m_id == mol_id:
+ graph = mol_entry.mol_graph
+ for entry in mol_entries: #(as well as differently charged variants) and adds them to the list
+ molecule_graph = entry.mol_graph #tests if other charges found
+ if molecule_graph.isomorphic_to(graph):
+ final_mol_ids.append(entry.entry_id)
+
+to_remove = ['nbo', 'alpha', 'beta']
+
+test_set = []
+for entry in database_entries:
+ if entry['molecule_id'] in final_mol_ids:
+ new_entry = copy.deepcopy(entry)
+ for name in entry.keys():
+ for remove in to_remove:
+ if remove in name:
+ val = new_entry.pop(name)
+ test_set.append(new_entry)
+
+dumpfn(test_set, 'atom_mapping_set.json') 
+
+new_mol_json = loadfn('atom_mapping_set.json')
+new_mol_entries= [MoleculeEntry.from_mp_doc(e) for e in new_mol_json]
+
+new_id_list = []
+
+for molecule in new_mol_entries:
+ new_id_list.append(molecule.entry_id)
+
+new_id_set = set(new_id_list)
+mol_id_set = set(mol_id_list)
+differences = mol_id_set - new_id_set
+if differences == set():
+ print('All desired entries successfully copied')
+else:
+ for elem in differences:
+ print('entry ', elem, ' not copied successfully')
diff --git a/HiPRGen/mc_analysis.py b/HiPRGen/mc_analysis.py
@@ -1115,8 +1115,8 @@ def sink_filter(self, species_index):
  mol = self.network_loader.mol_entries[species_index]
  if (number_of_consuming_reactions + number_of_producing_reactions > 0 and
  ratio > 1.5 and
- expected_value > 0.1 and
- mol.spin_multiplicity == 1):
+ expected_value > 0.1):# and
+ #mol.spin_multiplicity == 1):
  return True
  else:
  return False

diff --git a/HiPRGen/reaction_filter.py b/HiPRGen/reaction_filter.py
@@ -15,7 +15,7 @@
 )
 
 """
-Phases 3 & 4 run in paralell using MPI
+Phases 3 & 4 run in parallel using MPI
 
 Phase 3: reaction gen and filtering
 input: a bucket labeled by atom count