Skip to content

Commit

Permalink
createFoldseekAlignment
Browse files Browse the repository at this point in the history
  • Loading branch information
karolamik13 committed Nov 10, 2024
1 parent 1260011 commit f2956d9
Showing 1 changed file with 69 additions and 1 deletion.
70 changes: 69 additions & 1 deletion prody/proteins/interactions.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@
'Interactions', 'InteractionsTrajectory', 'LigandInteractionsTrajectory',
'calcSminaBindingAffinity', 'calcSminaPerAtomInteractions', 'calcSminaTermValues',
'showSminaTermValues', 'showPairEnergy', 'checkNonstandardResidues',
'saveInteractionsAsDummyAtoms', 'extractMultiModelPDB', 'calcSignatureInteractions']
'saveInteractionsAsDummyAtoms', 'createFoldseekAlignment',
'extractMultiModelPDB', 'calcSignatureInteractions']


def cleanNumbers(listContacts):
Expand Down Expand Up @@ -3239,6 +3240,73 @@ def showSminaTermValues(data):
return show


def createFoldseekAlignment(prot_seq, prot_foldseek, *kwargs):
"""Aligns sequences from prot_seq with homologous sequences identified in prot_foldseek,
generating a multiple sequence alignment.
:arg prot_seq: The natural sequence extracted from the PDB (seq file)
:type prot_seq: str
:arg prot_foldseek: The results from foldseek (foldseek file)
:type prot_foldseek: str
:arg msa_output_name: The natural sequence extracted from the PDB (msa file)
:type msa_output_name: str
"""

msa_output_name = kwargs.pop('msa_output_name', 'prot_struc.msa')

def find_match_index(tar_nogap, nat_seq):
tar_nogap_str = ''.join(tar_nogap)
nat_seq_str = ''.join(nat_seq)
index = nat_seq_str.find(tar_nogap_str)
return index

# Read input files
with open(prot_seq, 'r') as f:
file1 = f.readlines()

with open(prot_foldseek, 'r') as f:
file2 = f.readlines()

# Open output file
with open(msa_output_name, 'w') as fp:
nat_seq = list(file1[0].strip())

# Write the natural sequence to the output file
fp.write(''.join(nat_seq) + "\n")

# Process each foldseek entry
for line in file2:
entries = line.split()

if float(entries[2]) >= 0.5:
tar_seq = list(entries[11].strip())
mat_seq = list(entries[12].strip())

tar_nogap = []
processed_mat = []

for j in range(len(tar_seq)):
if tar_seq[j] != '-':
tar_nogap.append(tar_seq[j])
processed_mat.append(mat_seq[j])

match_index = find_match_index(tar_nogap, nat_seq)
end_index = match_index + len(tar_nogap)
m = 0

for l in range(len(nat_seq)):
if l < match_index:
fp.write("-")
elif l >= match_index and l < end_index:
fp.write(processed_mat[m])
m += 1
else:
fp.write("-")
fp.write("\n")


def extractMultiModelPDB(multimodelPDB, **kwargs):
"""Extracts individual PDB models from multimodel PDB and places them into the pointed directory.
If used for calculating calcSignatureInteractions align the models.
Expand Down

0 comments on commit f2956d9

Please sign in to comment.