Skip to content

Commit

Permalink
Merge branch 'prody:master' into interactions
Browse files Browse the repository at this point in the history
  • Loading branch information
karolamik13 authored Nov 9, 2023
2 parents 05b6fdf + 0a0c500 commit cfa2128
Show file tree
Hide file tree
Showing 8 changed files with 89 additions and 43 deletions.
19 changes: 11 additions & 8 deletions prody/proteins/ciffile.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
from .cifheader import getCIFHeaderDict
from .header import buildBiomolecules, assignSecstr, isHelix, isSheet

from string import ascii_uppercase

__all__ = ['parseMMCIFStream', 'parseMMCIF', 'parseCIF']


Expand Down Expand Up @@ -86,7 +88,7 @@ def parseMMCIF(pdb, **kwargs):
auto_bonds = SETTINGS.get('auto_bonds')
get_bonds = kwargs.get('bonds', auto_bonds)
if get_bonds:
LOGGER.warn('Parsing struct_conn information from mmCIF is current unsupported and no bond information is added to the results')
LOGGER.warn('Parsing struct_conn information from mmCIF is currently unsupported and no bond information is added to the results')
if not os.path.isfile(pdb):
if len(pdb) == 5 and pdb.isalnum():
if chain is None:
Expand All @@ -105,8 +107,12 @@ def parseMMCIF(pdb, **kwargs):

if os.path.isfile(pdb + '.cif'):
filename = pdb + '.cif'
LOGGER.debug('CIF file is found in working directory ({0}).'
.format(filename))
elif os.path.isfile(pdb + '.cif.gz'):
filename = pdb + '.cif.gz'
LOGGER.debug('CIF file is found in working directory ({0}).'
.format(filename))
else:
filename = fetchPDB(pdb, report=True,
format='cif', compressed=False)
Expand Down Expand Up @@ -300,6 +306,7 @@ def _parseMMCIFLines(atomgroup, lines, model, chain, subset,
doneAtomBlock = False
start = 0
stop = 0
warnedAltloc = False
while not doneAtomBlock:
line = lines[i]
if line[:11] == '_atom_site.':
Expand Down Expand Up @@ -431,7 +438,7 @@ def _parseMMCIFLines(atomgroup, lines, model, chain, subset,
continue

alt = line.split()[fields['label_alt_id']]
if alt not in which_altlocs and which_altlocs != 'all':
if not (alt in which_altlocs or ascii_uppercase[int(alt)-1] in which_altlocs) and which_altlocs != 'all':
continue

if alt == '.':
Expand Down Expand Up @@ -505,12 +512,8 @@ def _parseMMCIFLines(atomgroup, lines, model, chain, subset,

anisou = None
siguij = None
try:
data = parseSTARSection(lines, "_atom_site_anisotrop")
x = data[0] # check if data has anything in it
except IndexError:
LOGGER.warn("No anisotropic B factors found")
else:
data = parseSTARSection(lines, "_atom_site_anisotrop", report=False)
if len(data) > 0:
anisou = np.zeros((acount, 6),
dtype=float)

Expand Down
40 changes: 20 additions & 20 deletions prody/proteins/cifheader.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,8 @@ def _getBiomoltrans(lines):
# 2 blocks are needed for this:
# _pdbx_struct_assembly_gen: what to apply to which chains
# _pdbx_struct_oper_list: everything else
data1 = parseSTARSection(lines, '_pdbx_struct_assembly_gen')
data2 = parseSTARSection(lines, '_pdbx_struct_oper_list')
data1 = parseSTARSection(lines, '_pdbx_struct_assembly_gen', report=False)
data2 = parseSTARSection(lines, '_pdbx_struct_oper_list', report=False)

# extracting the data
for n, item1 in enumerate(data1):
Expand Down Expand Up @@ -225,7 +225,7 @@ def _getRelatedEntries(lines):

try:
key = "_pdbx_database_related"
data = parseSTARSection(lines, key)
data = parseSTARSection(lines, key, report=False)
for item in data:
dbref = DBRef()
dbref.accession = item[key + ".db_id"]
Expand Down Expand Up @@ -715,8 +715,8 @@ def _getReference(lines):

# JRNL double block. Blocks 6 and 7 as copied from COMPND
# Block 1 has most info. Block 2 has author info
items1 = parseSTARSection(lines, "_citation")
items2 = parseSTARSection(lines, "_citation_author")
items1 = parseSTARSection(lines, "_citation", report=False)
items2 = parseSTARSection(lines, "_citation_author", report=False)

for row in items1:
for k, value in row.items():
Expand Down Expand Up @@ -767,7 +767,7 @@ def _getPolymers(lines):
entities = defaultdict(list)

# SEQRES block
items1 = parseSTARSection(lines, '_entity_poly')
items1 = parseSTARSection(lines, '_entity_poly', report=False)

for item in items1:
chains = item['_entity_poly.pdbx_strand_id']
Expand All @@ -781,7 +781,7 @@ def _getPolymers(lines):
'_entity_poly.pdbx_seq_one_letter_code_can'].replace(';', '').split())

# DBREF block 1
items2 = parseSTARSection(lines, '_struct_ref')
items2 = parseSTARSection(lines, '_struct_ref', report=False)

for item in items2:
entity = item["_struct_ref.id"]
Expand All @@ -798,7 +798,7 @@ def _getPolymers(lines):
poly.dbrefs.append(dbref)

# DBREF block 2
items3 = parseSTARSection(lines, "_struct_ref_seq")
items3 = parseSTARSection(lines, "_struct_ref_seq", report=False)

for i, item in enumerate(items3):
i += 1
Expand Down Expand Up @@ -884,7 +884,7 @@ def _getPolymers(lines):
last = temp

# MODRES block
data4 = parseSTARSection(lines, "_pdbx_struct_mod_residue")
data4 = parseSTARSection(lines, "_pdbx_struct_mod_residue", report=False)

for data in data4:
ch = data["_pdbx_struct_mod_residue.label_asym_id"]
Expand All @@ -904,7 +904,7 @@ def _getPolymers(lines):
data["_pdbx_struct_mod_residue.details"]))

# SEQADV block
data5 = parseSTARSection(lines, "_struct_ref_seq_dif")
data5 = parseSTARSection(lines, "_struct_ref_seq_dif", report=False)

for i, data in enumerate(data5):
ch = data["_struct_ref_seq_dif.pdbx_pdb_strand_id"]
Expand Down Expand Up @@ -964,8 +964,8 @@ def _getPolymers(lines):

# COMPND double block.
# Block 6 has most info. Block 7 has synonyms
data6 = parseSTARSection(lines, "_entity")
data7 = parseSTARSection(lines, "_entity_name_com")
data6 = parseSTARSection(lines, "_entity", report=False)
data7 = parseSTARSection(lines, "_entity_name_com", report=False)

dict_ = {}
for molecule in data6:
Expand Down Expand Up @@ -1045,7 +1045,7 @@ def _getChemicals(lines):
# 1st block we need is has info about location in structure

# this instance only includes single sugars not branched structures
items = parseSTARSection(lines, "_pdbx_nonpoly_scheme")
items = parseSTARSection(lines, "_pdbx_nonpoly_scheme", report=False)

for data in items:
resname = data["_pdbx_nonpoly_scheme.mon_id"]
Expand All @@ -1064,7 +1064,7 @@ def _getChemicals(lines):
chemicals[chem.resname].append(chem)

# next we get the equivalent one for branched sugars part
items = parseSTARSection(lines, "_pdbx_branch_scheme")
items = parseSTARSection(lines, "_pdbx_branch_scheme", report=False)

for data in items:
resname = data["_pdbx_branch_scheme.mon_id"]
Expand All @@ -1080,7 +1080,7 @@ def _getChemicals(lines):
chemicals[chem.resname].append(chem)

# 2nd block to get has general info e.g. name and formula
items = parseSTARSection(lines, "_chem_comp")
items = parseSTARSection(lines, "_chem_comp", report=False)

for data in items:
resname = data["_chem_comp.id"]
Expand Down Expand Up @@ -1155,7 +1155,7 @@ def _getTitle(lines):
title = ''

try:
data = parseSTARSection(lines, "_struct")
data = parseSTARSection(lines, "_struct", report=False)
for item in data:
title += item['_struct.title'].upper()
except:
Expand All @@ -1172,7 +1172,7 @@ def _getAuthors(lines):
authors = []

try:
data = parseSTARSection(lines, "_audit_author")
data = parseSTARSection(lines, "_audit_author", report=False)
for item in data:
author = ''.join(item['_audit_author.name'].split(', ')[::-1])
authors.append(author.upper())
Expand All @@ -1192,7 +1192,7 @@ def _getSplit(lines):
key = "_pdbx_database_related"

try:
data, _ = parseSTARSection(lines, key)
data, _ = parseSTARSection(lines, key, report=False)
for item in data:
if item[key + '.content_type'] == 'split':
split.append(item[key + '.db_id'])
Expand Down Expand Up @@ -1227,7 +1227,7 @@ def _getOther(lines, key=None):
data = []

try:
data = parseSTARSection(lines, key)
data = parseSTARSection(lines, key, report=False)
except:
pass

Expand All @@ -1242,7 +1242,7 @@ def _getUnobservedSeq(lines):
key_unobs = '_pdbx_unobs_or_zero_occ_residues'

try:
unobs = parseSTARSection(lines, key_unobs)
unobs = parseSTARSection(lines, key_unobs, report=False)
polymers = _getPolymers(lines)
except:
pass
Expand Down
14 changes: 12 additions & 2 deletions prody/proteins/emdfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,12 @@ def parseEMD(emd, **kwargs):

if os.path.isfile(emd + '.map'):
filename = emd + '.map'
LOGGER.debug('EMD file is found in working directory ({0}).'
.format(filename))
elif os.path.isfile(emd + '.map.gz'):
filename = emd + '.map.gz'
LOGGER.debug('EMD file is found in working directory ({0}).'
.format(filename))
else:
filename = fetchPDB(emd, report=True,
format='emd', compressed=False)
Expand All @@ -91,6 +95,14 @@ def parseEMD(emd, **kwargs):
result = parseEMDStream(emdStream, **kwargs)
emdStream.close()

if hasattr(result, 'numAtoms'):
LOGGER.info('Output is an AtomGroup with {0} atoms fitted.'.format(result.numAtoms()))
elif hasattr(result, 'apix'):
LOGGER.info('Output is an EMDMAP with {:4.2f} A/pix.'.format(result.apix[0]))
else:
LOGGER.warn('Atomic data could not be parsed, please '
'check the input file.')

return result


Expand Down Expand Up @@ -128,8 +140,6 @@ def parseEMDStream(stream, **kwargs):
else:
make_nodes = False
map = True
LOGGER.info('As n_nodes is less than or equal to 0, no nodes will be'
' made and the raw map will be returned')

emd = EMDMAP(stream, min_cutoff, max_cutoff)

Expand Down
25 changes: 18 additions & 7 deletions prody/proteins/localpdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,16 +212,15 @@ def fetchPDB(*pdb, **kwargs):
if len(pdb) == 1 and isinstance(pdb[0], list):
pdb = pdb[0]

if 'format' in kwargs and kwargs.get('format') != 'pdb':
return fetchPDBviaFTP(*pdb, **kwargs)

identifiers = checkIdentifiers(*pdb)

folder = kwargs.get('folder', '.')
compressed = kwargs.get('compressed')
format_ = kwargs.get('format')

# check *folder* specified by the user, usually pwd ('.')
filedict = findPDBFiles(folder, compressed=compressed)
filedict = findPDBFiles(folder, compressed=compressed,
format=format_)

filenames = []
not_found = []
Expand All @@ -240,8 +239,8 @@ def fetchPDB(*pdb, **kwargs):
if len(filenames) == 1:
filenames = filenames[0]
if exists:
LOGGER.debug('PDB file is found in working directory ({0}).'
.format(sympath(filenames)))
LOGGER.debug('{0} file is found in working directory ({1}).'
.format(format_.upper(), sympath(filedict[pdb])))
return filenames

if not isWritable(folder):
Expand Down Expand Up @@ -414,6 +413,8 @@ def iterPDBFilenames(path=None, sort=False, unique=True, **kwargs):

from re import compile, IGNORECASE

format = kwargs.get('format')

if path is None or kwargs.get('mirror') is True:
if path is None:
path = pathPDBMirror()
Expand All @@ -436,10 +437,20 @@ def iterPDBFilenames(path=None, sort=False, unique=True, **kwargs):
compressed = kwargs.get('compressed')
if compressed is None:
pdbext = compile('\.(pdb|ent)(\.gz)?$', IGNORECASE)
cifext = compile('\.(cif)(\.gz)?$', IGNORECASE)
emdext = compile('\.(emd|map|mrc)(\.gz)?$', IGNORECASE)
elif compressed:
pdbext = compile('\.(pdb|ent)\.gz$', IGNORECASE)
cifext = compile('\.(cif)\.gz$', IGNORECASE)
emdext = compile('\.(emd|map|mrc)\.gz$', IGNORECASE)
else:
pdbext = compile('\.(pdb|ent)$', IGNORECASE)
cifext = compile('\.(cif)$', IGNORECASE)
emdext = compile('\.(emd|map|mrc)$', IGNORECASE)
if format == 'cif':
pdbext = cifext
if format == 'emd':
pdbext = emdext
pdbs = [pdb for pdb in iglob(join(path, '*')) if pdbext.search(pdb)]
if sort:
pdbs.sort(reverse=kwargs.get('reverse'))
Expand Down Expand Up @@ -476,7 +487,7 @@ def findPDBFiles(path, case=None, **kwargs):
pdb = splitext(split(fn)[1])[0]
ending = splitext(splitext(split(fn)[1])[0])[1]
if ending == 'gz':
pdb = splittext(pdb)[0]
pdb = splitext(pdb)[0]
if len(pdb) == 7 and pdb.startswith('pdb'):
pdb = pdb[3:]
if upper:
Expand Down
3 changes: 3 additions & 0 deletions prody/proteins/mmtffile.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,9 @@ def set_info(atomgroup, mmtf_data,get_bonds=False,altloc_sel='A'):
if altloc_sel != 'all':
#mask out any unwanted alternative locations
mask = (altlocs == '') | (altlocs == altloc_sel)

if np.all(mask == False):
mask = (altlocs == '') | (altlocs == altlocs[0])

atomgroup.setCoords(coords[:,mask])
atomgroup.setNames(atom_names[mask])
Expand Down
11 changes: 7 additions & 4 deletions prody/proteins/pdbfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -1242,8 +1242,7 @@ def writePDBStream(stream, atoms, csets=None, **kwargs):
Default is **False**, which means using hexadecimal instead.
NB: ChimeraX seems to prefer hybrid36 and may have problems with hexadecimal.
:type hybrid36: bool
"""
initialACSI = atoms.getACSIndex()
"""
renumber = kwargs.get('renumber', True)

remark = str(atoms)
Expand All @@ -1262,8 +1261,10 @@ def writePDBStream(stream, atoms, csets=None, **kwargs):
if coordsets is None:
raise ValueError('atoms does not have any coordinate sets')

had_atoms = False
try:
acsi = atoms.getACSIndex()
had_atoms = True
except AttributeError:
try:
atoms = atoms.getAtoms()
Expand Down Expand Up @@ -1438,7 +1439,8 @@ def writePDBStream(stream, atoms, csets=None, **kwargs):
num_ter_lines = 0
for m, coords in enumerate(coordsets):

atoms.setACSIndex(m)
if had_atoms:
atoms.setACSIndex(m)
anisous = atoms._getAnisous()
if anisous is not None:
anisous = np.array(anisous * 10000, dtype=int)
Expand Down Expand Up @@ -1614,7 +1616,8 @@ def writePDBStream(stream, atoms, csets=None, **kwargs):

write('END ' + " "*74 + '\n')

atoms.setACSIndex(initialACSI)
if had_atoms:
atoms.setACSIndex(acsi)

writePDBStream.__doc__ += _writePDBdoc

Expand Down
5 changes: 3 additions & 2 deletions prody/proteins/starfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -1026,7 +1026,7 @@ def parseImagesFromSTAR(particlesSTAR, **kwargs):
return np.array(images), parsed_images_data


def parseSTARSection(lines, key):
def parseSTARSection(lines, key, report=True):
"""Parse a section of data from *lines* from a STAR file
corresponding to a *key* (part before the dot).
This can be a loop or data block.
Expand Down Expand Up @@ -1077,7 +1077,8 @@ def parseSTARSection(lines, key):
else:
data = [loop_dict["data"]]
else:
LOGGER.warn("Could not find {0} in lines.".format(key))
if report:
LOGGER.warn("Could not find {0} in lines.".format(key))
return []

return data
Loading

0 comments on commit cfa2128

Please sign in to comment.