diff --git a/examples/6qwn-assembly1.cif.gz b/examples/6qwn-assembly1.cif.gz new file mode 100644 index 0000000..ea1a8cd Binary files /dev/null and b/examples/6qwn-assembly1.cif.gz differ diff --git a/examples/6qwn-assembly2.cif.gz b/examples/6qwn-assembly2.cif.gz new file mode 100644 index 0000000..5e289de Binary files /dev/null and b/examples/6qwn-assembly2.cif.gz differ diff --git a/run_test.sh b/run_test.sh index a84aba5..78cdf6c 100644 --- a/run_test.sh +++ b/run_test.sh @@ -9,15 +9,10 @@ else binary="DockQ" fi -# Test that cython version behaves the same as nocython $binary examples/1A2K_r_l_b.model.pdb examples/1A2K_r_l_b.pdb > test diff test testdata/1A2K.dockq -python src/DockQ/DockQ.py examples/1A2K_r_l_b.model.pdb examples/1A2K_r_l_b.pdb > test -diff test testdata/1A2K.dockq $binary examples/1A2K_r_l_b.model.pdb examples/1A2K_r_l_b.pdb --no_align > test diff test testdata/1A2K.dockq -python src/DockQ/DockQ.py examples/1A2K_r_l_b.model.pdb examples/1A2K_r_l_b.pdb --no_align > test -diff test testdata/1A2K.dockq # Multiple interfaces $binary examples/dimer_dimer.model.pdb examples/dimer_dimer.pdb --short > test @@ -40,3 +35,13 @@ $binary examples/1EXB_r_l_b.model.pdb examples/1EXB_r_l_b.pdb --mapping DH:AE > diff test testdata/1EXB_DH.AE.dockq $binary examples/1EXB_r_l_b.model.pdb examples/1EXB.cif.gz --mapping DH:AE > test diff test testdata/1EXB_DH.AE_cif.dockq + +# Peptide measures +$binary examples/6qwn-assembly1.cif.gz examples/6qwn-assembly2.cif.gz --capri_peptide > test +diff test testdata/6q2n_peptide.dockq + +# Test that cython version behaves the same as nocython +python src/DockQ/DockQ.py examples/1A2K_r_l_b.model.pdb examples/1A2K_r_l_b.pdb > test +diff test testdata/1A2K.dockq +python src/DockQ/DockQ.py examples/1A2K_r_l_b.model.pdb examples/1A2K_r_l_b.pdb --no_align > test +diff test testdata/1A2K.dockq diff --git a/src/DockQ/DockQ.py b/src/DockQ/DockQ.py index 1bc0a08..a231bef 100755 --- a/src/DockQ/DockQ.py +++ b/src/DockQ/DockQ.py @@ -120,24 +120,6 @@ def get_aligned_residues(chainA, chainB, alignment): return tuple(aligned_resA), tuple(aligned_resB) -@lru_cache -def list_atoms_per_residue2(chain, what): - n_atoms_per_residue = [] - atoms_ids = [] - for residue in chain: - # important to remove duplicate atoms (e.g. alternates) at this stage - atom_ids = tuple([a.id for a in residue.get_unpacked_list()]) - atoms_ids.append(atom_ids) - n_atoms_per_residue.append(len(set(atom_ids))) - return tuple(n_atoms_per_residue), tuple(atoms_ids) - - -@lru_cache -def get_atoms(chain, what): - atoms = np.array([atom.get_coord() for res in chain for atom in res.get_atoms()]) - return atoms - - @lru_cache def get_residue_distances(chain1, chain2, what, all_atom=True): if all_atom: @@ -174,148 +156,6 @@ def get_residue_distances(chain1, chain2, what, all_atom=True): return model_res_distances -@lru_cache -def get_aligned_atoms(n_atoms_per_res_chainA, n_atoms_per_res_chainB, alignment): - - if alignment[0] == alignment[2]: # no need to align - return ( - np.ones(sum(list(n_atoms_per_res_chainA))).astype(bool), - np.ones(sum(list(n_atoms_per_res_chainB))).astype(bool), - [i for i in range(len(alignment[0]))], - [i for i in range(len(alignment[1]))], - ) - atom_index_A = np.zeros(sum(list(n_atoms_per_res_chainA))).astype(bool) - atom_index_B = np.zeros(sum(list(n_atoms_per_res_chainB))).astype(bool) - res_index_A = [] - res_index_B = [] - - n_atoms_per_res_chainA = iter(n_atoms_per_res_chainA) - n_atoms_per_res_chainB = iter(n_atoms_per_res_chainB) - countA = iA = 0 - countB = iB = 0 - - for A, match, B in zip(*alignment): - if A != "-": - nA = next(n_atoms_per_res_chainA) - countA += nA - iA += 1 - if B != "-": - nB = next(n_atoms_per_res_chainB) - countB += nB - iB += 1 - - if match == "|": - atom_index_A[countA - nA : countA] = 1 - atom_index_B[countB - nB : countB] = 1 - res_index_A.append(iA - 1) - res_index_B.append(iB - 1) - - return atom_index_A, atom_index_B, res_index_A, res_index_B - - -class NpWrapper: - def __init__(self, x: np.array) -> None: - self.values = x - # here you can use your own hashing function - self.h = hashlib.sha224(x.view()).hexdigest() - - def __hash__(self) -> int: - return hash(self.h) - - def __eq__(self, __value: object) -> bool: - return __value.h == self.h - - -def dist_memoizer(get_atom_distances): - @lru_cache() - def cached_wrapper(np1, np2, n_atoms, what): - return get_atom_distances(np1.values, np2.values, n_atoms, what) - - @wraps(get_atom_distances) - def wrapper(x: np.array, y: np.array, n_atoms, what): - np1 = NpWrapper(x) - np2 = NpWrapper(y) - return cached_wrapper(np1, np2, n_atoms, what) - - return wrapper - - -@dist_memoizer -def get_atom_distances(chain1_atoms, chain2_atoms, n_atoms_per_res, what): - n_atoms_per_res_chain1, n_atoms_per_res_chain2 = n_atoms_per_res - - model_res_distances = residue_distances( - chain1_atoms, - chain2_atoms, - np.array(n_atoms_per_res_chain1).astype(int), - np.array(n_atoms_per_res_chain2).astype(int), - ) - return model_res_distances - - -def sup_memoizer(superimpose): - @lru_cache() - def cached_wrapper(np1, np2): - return superimpose(np1.values, np2.values) - - @wraps(superimpose) - def wrapper(x: np.array, y: np.array): - np1 = NpWrapper(x) - np2 = NpWrapper(y) - return cached_wrapper(np1, np2) - - return wrapper - - -@sup_memoizer -def superimpose(from_atoms, to_atoms): - super_imposer = SVDSuperimposer() - # Set to align on receptor - super_imposer.set(to_atoms, from_atoms) - super_imposer.run() - rot, tran = super_imposer.get_rotran() - return rot, tran, super_imposer - - -@lru_cache -def filter_atoms(model_info, native_info, filter=("CA", "C", "N", "O", "P")): - model_bools = [] - native_bools = [] - if "CA" in filter: # backbone filter - model_atom_ids = model_info - native_atom_ids = native_info - - n_atoms_per_res = [] - for model_ids, native_ids in zip(model_atom_ids, native_atom_ids): - model_select = [ - 1 if atom in filter and atom in native_ids else 0 - for atom in list(dict.fromkeys(model_ids)) - ] - native_select = [ - 1 if atom in filter and atom in model_ids else 0 - for atom in list(dict.fromkeys(native_ids)) - ] - model_bools.extend(model_select) - native_bools.extend(native_select) - n_atoms_per_res.append(sum(native_select)) - if sum(model_select) != sum(native_select): - print(model_ids, model_select) - print(native_ids, native_select) - print("ERROR!") - else: # filter by index - n_atoms_per_res = model_info - - for i, n_atoms in enumerate(n_atoms_per_res): - if i in filter: - model_bools.extend([1 for i in range(n_atoms)]) - else: - model_bools.extend([0 for i in range(n_atoms)]) - native_bools = model_bools - model_bools = np.array(model_bools).astype(bool) - native_bools = np.array(native_bools).astype(bool) - return model_bools, native_bools, tuple(n_atoms_per_res) - - # @profile def calc_DockQ( sample_chains, @@ -544,39 +384,6 @@ def format_alignment(aln): return alignment -def remove_hetatms(model): - chains = [chain.id for chain in model.get_chains()] - residues_to_delete = [] - - for chain in chains: - residues = model[chain].get_residues() - - for res in residues: - if res.id[0] != " ": - residues_to_delete.append(res.get_full_id()) - for _, _, chain, res in residues_to_delete: - model[chain].detach_child(res) - - for chain in chains: - if not model[chain]: - model.detach_child(chain) - - -def remove_h(model): - chains = [chain.id for chain in model.get_chains()] - atoms_to_delete = [] - - for chain in chains: - residues = model[chain].get_residues() - for residue in residues: - for atom in residue.get_atoms(): - if atom.element == "H": - atoms_to_delete.append(atom.get_full_id()) - - for _, _, chain, res, atom in atoms_to_delete: - model[chain][res].detach_child(atom[0]) - - @lru_cache def list_atoms_per_residue(chain, what): n_atoms_per_residue = []