Skip to content

Commit

Permalink
added python3 compatibility (for dlpisces.py and download_pdbs.py), f…
Browse files Browse the repository at this point in the history
…ixed numerous packaging bugs preventing pip installation, and made the code slightly more modular.
  • Loading branch information
jewettaij committed Jan 25, 2021
1 parent c4613c9 commit 19a95fd
Show file tree
Hide file tree
Showing 9 changed files with 367 additions and 134 deletions.
11 changes: 6 additions & 5 deletions dlpdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,20 @@
# Hence, only a few of the modules contain useful functions that can be
# accessed from within python. They are below:
from .resid import ResID
from .closest_points import ClosestPoints
from .closest_line_points import ClosestLinePoints
from .coords2angles import Coords2AnglesLengths, Coords2Angles
from .coords2dihedrals import Coords2DihedralsAnglesLengths, Coords2Dihedrals
from .coords2dihedrals_inner import Coords2DihedralsLengthsInner, Coords2DihedralsInner
from .coords2dihedrals_inner import Coords2DihedralsInnerLengths, Coords2DihedralsInner
from .helixAngleOmega import CalcOmegaFromThetaPhi, CalcOmega

# I no longer remember why I import "main" from the executable scripts.
# Perhaps these next few lines are unnecessary, but they seem to do no harm:
from .coords2angles import main
from .coords2dihedrals_inner import main
from .coords2dihedrals import main
from .coords2distances import main
from .coords2helixAngleOmega.py import main
from .coords2helixAngleOmega import main
from .dlpdbfile import main
from .dlpisces import main
from .dna_interleave_residues import main
from .download_pdbs import main
Expand All @@ -26,7 +28,6 @@
from .has_secondary_str import main
from .has_sheets import main
from .has_turns import main
from .helixAngleOmega import main
from .merge_lines_periodic import main
from .pdb2coords_ave import main
from .pdb2coords import main
Expand All @@ -46,7 +47,7 @@
'coords2dihedrals_inner',
'coords2dihedrals',
'coords2distances',
'coords2helixAngleOmega.py',
'coords2helixAngleOmega',
'dlpisces',
'dna_interleave_residues',
'download_pdbs',
Expand Down
2 changes: 1 addition & 1 deletion dlpdb/closest_points.py → dlpdb/closest_line_points.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def PrintVect(v):
sys.stdout.write('\n')


def ClosestPoints(ra0, rb0, va, vb):
def ClosestLinePoints(ra0, rb0, va, vb):
"""
Return the closest pair of points in 3D along two lines of infinite length
pointing in directions va,vb and passing through points ra0,rb0.
Expand Down
4 changes: 2 additions & 2 deletions dlpdb/coords2dihedrals_inner.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,11 @@
import sys
from math import sqrt, cos, sin, tan, acos, asin, atan, pi, floor
try:
from .closest_points import ClosestPoints
from .closest_line_points import ClosestLinePoints
from .coords2dihedrals import Coords2DihedralsAnglesLengths,Coords2Dihedrals
except (ImportError, SystemError, ValueError):
# not installed as a package
from closest_points import ClosestPoints
from closest_line_points import ClosestLinePoints
from coords2dihedrals import Coords2DihedralsAnglesLengths,Coords2Dihedrals


Expand Down
92 changes: 7 additions & 85 deletions dlpdb/coords2helixAngleOmega.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,16 @@
"""


from math import sqrt, cos, sin, tan, acos, asin, atan, pi
import sys
from helixAngleOmega import CalcOmega



def length_v(r):
lsqd = 0.0
for d in range(0,len(r)):
lsqd += r[d]*r[d]
return sqrt(lsqd)

from math import sqrt, cos, sin, tan, acos, asin, atan, pi
try:
from .helixAngleOmega import CalcOmegaFromThetaPhi, CalcOmega
except (ImportError, SystemError, ValueError):
# not installed as a package
from helixAngleOmega import CalcOmegaFromThetaPhi, CalcOmega

def inner_prod_v(r1,r2):
result = 0.0
for d in range(0,len(r1)):
result += r1[d]*r2[d]
return result


def cross_prod_v3(a,b):
c = [0.0,0.0,0.0]
c[0] = a[1]*b[2] - a[2]*b[1]
c[1] = a[2]*b[0] - a[0]*b[2]
c[2] = a[0]*b[1] - a[1]*b[0]
return c


def main():
Expand Down Expand Up @@ -83,69 +67,7 @@ def main():

if (len(r_i[i])==3)and(len(r_i[i+1])==3)and(len(r_i[i+2])==3)and(len(r_i[i+3])==3):

r10 = [0.0, 0.0, 0.0]
r21 = [0.0, 0.0, 0.0]
r32 = [0.0, 0.0, 0.0]
for d in range(0,3):
r10[d] = r_i[i+1][d] - r_i[i][d]
r21[d] = r_i[i+2][d] - r_i[i+1][d]
r32[d] = r_i[i+3][d] - r_i[i+2][d]
l10 = length_v(r10)
l21 = length_v(r21)
l32 = length_v(r32)

n012 = cross_prod_v3(r10, r21)
n123 = cross_prod_v3(r21, r32)


# The torsion-angle or 4-body angle is named "angle0124"
cos_phi = inner_prod_v(n012, n123) /(length_v(n012)*length_v(n123))

# There is a problem whenever 4 consecutive atoms are coplanar:
#
# *---*
# | (all 4 atoms are coplanar, and phi = 0)
# *---*
#
# In this example, the torsion angle phi is well defined and =0.
# The problem is that, due to floating point roundoff
# "cos_phi" can sometimes slightly exceed 1.
# This causes a NAN when you calculate acos(cos_phi).

if (cos_phi > 1.0):
cos_phi = 1.0
elif (cos_phi < -1.0):
cos_phi = -1.0

phi = acos(cos_phi)

# This formula does not distinguish positive and negative phi.
#
# Negative torsion angles:
#
# Check if the position of atom i+3 is above the phi=0 plane
# (in the region of positive phi), or below the phi=0 plane.
# It is above the phi=0 plane if the bond from atom i+2 to i+3
# points in the same direction as the negative-phi-tangent-vector
# for atom i (not i+3) (...which points in the n012 direction)
if inner_prod_v(n012, r32) < 0.0:
phi = -phi

# The two bond-angles or 3-body angles are named "angle012" and "angle123"
angle012 = acos( -inner_prod_v(r10, r21) / (l10 * l21) )
angle123 = acos( -inner_prod_v(r21, r32) / (l21 * l32) )
# (The negative sign above comes from the fact that we are really
# interested in the angle between r21 and r01 (which is -r10).)

# Convert these angles to degrees, and print it out
#sys.stdout.write(str(angle012*180.0/pi)+' ')
#sys.stdout.write(str(angle123*180.0/pi)+' ')

# Let theta = the average of the two 3-body angles
theta = 0.5 * (angle012 + angle123)

# Omega (usually 360/3.6 ~= 100 degrees) is the helix rotation angle.
Omega = CalcOmega(theta, phi)
Omega = CalcOmega(r_i[i], r_i[i+1], r_i[i+2], r_i[i+3])

sys.stdout.write(str(Omega*180.0/pi))

Expand Down
218 changes: 218 additions & 0 deletions dlpdb/dlpdbfile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
#!/usr/bin/env python

"""
Typical usage (example):
dlpdbfile.py < list_of_pdb_codes.txt
Where the input file contains a list of pdb-codes, one per line:
100d
101d
102d
103d
104d
107d
108d
109d
Every line in this file contains a 4-letter pdb identifier.
Each 4-letter pdb code is compared with a list of pdb codes downloaded so far
(which is initially read from the file "pdbs_most_recent.txt", if present).
If it is new 4-letter pdb code, then the corresponding pdb file is downloaded,
and appended to the end of the "pdbs_most_recent.txt" file.
The corresponding DSSP file is also downloaded, if available.
"""

# author: Andrew Jewett
g_program_name = __file__.split('/')[-1]
g_date_str = '2021-1-24'
g_version_str = '0.6.0'


import sys, urllib, urllib.request, random, time, gzip


def FileExists(fname):
try:
file = open(fname, 'r')
except IOError:
exists = False
else:
exists = True
return exists



def ExtractFileName(url):
# The name of the file we create will be the same as the last few
# characters of the URL. (Whatever comes after the last '/' character.)
out_fname = url
slash_loc = out_fname.rfind('/')
if (slash_loc >= 0): # if '/' was found, then remove text preceeding '/'
out_fname = out_fname[slash_loc+1:]
if (out_fname == ''):
sys.stderr.write('Error: filename difficult to determine from URL:\n' +\
' \"'+url+'\"\n')
sys.exit(-1)
return out_fname



# This function downloads a file from a URL, and (if requested by the user)
# prints out a warning message if there was a problem.

import os

def DownloadFileTo(url, file_name, verbose_mode = True):
# Open the input file (at url) for reading
# Simple way:
# in_file = urllib.urlopen(url)
# But the web site may return garbage if the file doesn't exist.
# Instead, we check for errors first:
try: in_file = urllib.request.urlopen(url)
except urllib.error.URLError as e:
if (verbose_mode):
sys.stderr.write(str(e)+'\n')
sys.stderr.write(' omitting file \"'+file_name+'\"\n')
else:
if (verbose_mode):
sys.stderr.write('downloading file \"'+file_name+'\"\n')
# Open the output file for writing
out_file = open(file_name, 'wb')
out_file.write(in_file.read())
out_file.close()



# This version infers the file name from the URL.
def DownloadFile(url, verbose_mode = True):
DownloadFileTo(url, ExtractFileName(url), verbose_mode)




# Read in the list of PDB files already downloaded:

pdbs_old = set([])
try:
pdbs_old_file = open('pdbs_old.txt', 'r')
for pdb_code in pdbs_old_file:
pdb_code = pdb_code.strip()#eliminate trailing and preceeding whitespace
pdb_code = pdb_code.lower()#pdb codes are case-insensitive
if (len(pdb_code) != 4):
sys.stderr.write('Error in in \"pdbs_old.txt\":\n')
sys.stderr.write(' Invalid PDB-code: \"'+pdb_code+'\"\n')
exit(-1)
pdbs_old.add(pdb_code)
pdbs_old_file.close()
except IOError:
pass




def main():
# Here we keep track of the list of pdb files which
# i) are in the current list of pdb files requested in
# sys.stdin
# ii) are in the current list, but are not downloaded yet
# (new pdb files)
pdbs_current_file = open('pdbs_most_recent.txt', 'w')
pdbs_current = set([]) #entire list of pdb codes requested
pdbs_new = set([]) #a list of new pdb codes requested that were not in the old list
pdbs_old_file = open('pdbs_old.txt', 'a')


for line in sys.stdin:
line = line.strip()
if len(line) == 0:
continue
pdb_code = line.lower() #(pdb codes are case-insensitive)

if (pdb_code in pdbs_new):
sys.stderr.write(pdb_code+' appears redundantly. skipping\n')
elif (pdb_code in pdbs_old):
if (not (pdb_code in pdbs_current)):
sys.stderr.write(pdb_code+' downloaded already. skipping.\n')
pdbs_current.add(pdb_code)
pdbs_current_file.write(pdb_code+'\n')
pdbs_current_file.flush() #<- necessary in case we get interrupted
else:
sys.stderr.write(pdb_code+' appears redundantly. skipping\n')

else:
#Download the corresponding pdb file
file_name = pdb_code+'.pdb.gz' # <- these are compressed files
url = 'http://www.rcsb.org/pdb/files/'+file_name
# Note, if the URL above files, try this one instead:
# url = 'https://files.rcsb.org/download/'+file_name
DownloadFileTo(url, file_name)

#Unzip the pdb file:
with gzip.open(file_name, 'rb') as f:
file_content = f.read()
f.close()
f = open(pdb_code+'.pdb', 'wb')
f.write(file_content)
if not FileExists(pdb_code+'.pdb'):
sys.stderr.write('Error: A problem occured when trying to download PDB code \"'+pdb_code+'\"\n'
' Delete this entry from the file \"pdbs_old.txt\", and rerun '+g_program_name+'\n')
sys.exit(-1)

#Optional: Download the corresponding DSSP file
url = 'ftp://ftp.cmbi.ru.nl/pub/molbio/data/dssp/'+pdb_code+'.dssp'
DownloadFile(url)
if not FileExists(pdb_code+'.dssp'):
sys.stderr.write(" (The old DSSP PDB is server flaking out again.\n"
" Don't worry. DSSP files are not needed.)\n")

#Keep track of the the pdbs we have downloaded so far:

pdbs_current.add(pdb_code)
pdbs_current_file.write(pdb_code+'\n')
pdbs_current_file.flush() #<- necessary in case we get interrupted
pdbs_old_file.write(pdb_code+'\n')
pdbs_old_file.flush() #<- necessary in case we get interrupted
pdbs_new.add(pdb_code)


pdbs_current_file.close()

def ChainFromPDBfile(chainID, pdb_file):
for line in pdb_file:
line_type = line[0:6]
if line_type in set(['ATOM ', 'HETATM', 'ANISOU', 'SIGATM', 'SIGUIJ']):
if line[21:22] == chainID:
yield line
elif line_type == "HET ":
if line[12:13] == chainID:
yield line
elif line_type == "HELIX ":
initChainID = line[19:20]
endChainID = line[31:32]
if (initChainID == chainID) or (endChainID == chainID):
yield line
elif line_type == "SHEET ":
initChainID = line[21:22]
endChainID = line[32:33]
if (initChainID == chainID) or (endChainID == chainID):
yield line
elif line_type == "TURN ":
initChainID = line[19:20]
endChainID = line[30:31]
if (initChainID == chainID) or (endChainID == chainID):
yield line
elif line_type == "SEQRES":
if line[11:12] == chainID:
yield line
elif line_type == "TER ":
ter_chainID = line[21:22]
if (ter_chainID == chainID) or (ter_chainID == " "):
yield line
else:
yield line
return


if __name__ == "__main__":
main()
Loading

0 comments on commit 19a95fd

Please sign in to comment.