From 92c971a0ff1acd96ae428ce600e6c9476e741253 Mon Sep 17 00:00:00 2001
From: Zhuoran Qiao <zqiao@caltech.edu>
Date: Tue, 26 Apr 2022 00:11:44 -0700
Subject: [PATCH 1/5] fixing the bond matching algorithm

---
 prody/atomic/atomgroup.py |  22 +++----
 prody/proteins/pdbfile.py | 128 ++++++++++++++++++++++----------------
 2 files changed, 84 insertions(+), 66 deletions(-)

diff --git a/prody/atomic/atomgroup.py b/prody/atomic/atomgroup.py
index 2763d5862..3f39f06f6 100644
--- a/prody/atomic/atomgroup.py
+++ b/prody/atomic/atomgroup.py
@@ -1095,8 +1095,8 @@ def setBonds(self, bonds):
         index 1"``.  See :mod:`.select` module documentation for details.
         Also, a data array with number of bonds will be generated and stored
         with label *numbonds*.  This can be used in atom selections, e.g.
-        ``'numbonds 0'`` can be used to select ions in a system. If *bonds* 
-        is empty or **None**, then all bonds will be removed for this 
+        ``'numbonds 0'`` can be used to select ions in a system. If *bonds*
+        is empty or **None**, then all bonds will be removed for this
         :class:`.AtomGroup`. """
 
         if bonds is None or len(bonds) == 0:
@@ -1126,7 +1126,7 @@ def setBonds(self, bonds):
         self._fragments = None
 
     def numBonds(self):
-        """Returns number of bonds.  Use :meth:`setBonds` or 
+        """Returns number of bonds.  Use :meth:`setBonds` or
         :meth:`inferBonds` for setting bonds."""
 
         if self._bonds is not None:
@@ -1134,7 +1134,7 @@ def numBonds(self):
         return 0
 
     def getBonds(self):
-        """Returns bonds.  Use :meth:`setBonds` or 
+        """Returns bonds.  Use :meth:`setBonds` or
         :meth:`inferBonds` for setting bonds."""
 
         if self._bonds is not None:
@@ -1171,7 +1171,7 @@ def iterBonds(self):
                 yield Bond(self, bond, acsi)
 
     def _iterBonds(self):
-        """Yield pairs of bonded atom indices. Use :meth:`setBonds` 
+        """Yield pairs of bonded atom indices. Use :meth:`setBonds`
         or `inferBonds` for setting bonds."""
 
         if self._bonds is not None:
@@ -1366,8 +1366,8 @@ def setDonors(self, donors):
         index 1"``.  See :mod:`.select` module documentation for details.
         Also, a data array with number of donors will be generated and stored
         with label *numdonors*.  This can be used in atom selections, e.g.
-        ``'numdonors 0'`` can be used to select ions in a system. If *donors* 
-        is empty or **None**, then all donors will be removed for this 
+        ``'numdonors 0'`` can be used to select ions in a system. If *donors*
+        is empty or **None**, then all donors will be removed for this
         :class:`.AtomGroup`. """
 
         if donors is None or len(donors) == 0:
@@ -1432,8 +1432,8 @@ def setAcceptors(self, acceptors):
         index 1"``.  See :mod:`.select` module documentation for details.
         Also, a data array with number of acceptors will be generated and stored
         with label *numacceptors*.  This can be used in atom selections, e.g.
-        ``'numacceptors 0'`` can be used to select ions in a system. If *acceptors* 
-        is empty or **None**, then all acceptors will be removed for this 
+        ``'numacceptors 0'`` can be used to select ions in a system. If *acceptors*
+        is empty or **None**, then all acceptors will be removed for this
         :class:`.AtomGroup`. """
 
         if acceptors is None or len(acceptors) == 0:
@@ -1498,8 +1498,8 @@ def setNBExclusions(self, nbexclusions):
         index 1"``.  See :mod:`.select` module documentation for details.
         Also, a data array with number of nbexclusions will be generated and stored
         with label *numnbexclusions*.  This can be used in atom selections, e.g.
-        ``'numnbexclusions 0'`` can be used to select ions in a system. If *nbexclusions* 
-        is empty or **None**, then all nbexclusions will be removed for this 
+        ``'numnbexclusions 0'`` can be used to select ions in a system. If *nbexclusions*
+        is empty or **None**, then all nbexclusions will be removed for this
         :class:`.AtomGroup`. """
 
         if nbexclusions is None or len(nbexclusions) == 0:
diff --git a/prody/proteins/pdbfile.py b/prody/proteins/pdbfile.py
index da5eeb553..a3bf68c35 100644
--- a/prody/proteins/pdbfile.py
+++ b/prody/proteins/pdbfile.py
@@ -26,7 +26,7 @@
            'writePDBStream', 'writePDB', 'writeChainsList', 'writePQR',
            'writePQRStream']
 
-MAX_N_ATOM = 99999 
+MAX_N_ATOM = 99999
 MAX_N_RES = 9999
 
 class PDBParseError(Exception):
@@ -71,9 +71,9 @@ class PDBParseError(Exception):
 
     :arg biomol: if **True**, biomolecules are obtained by transforming the
         coordinates using information from header section will be returned.
-        This option uses :func:`.buildBiomolecules` and as noted there, 
-        atoms in biomolecules are ordered according to the original chain 
-        IDs. Chains may have the same chain ID, in which case they are given 
+        This option uses :func:`.buildBiomolecules` and as noted there,
+        atoms in biomolecules are ordered according to the original chain
+        IDs. Chains may have the same chain ID, in which case they are given
         different segment names.
         Default is **False**
     :type biomol: bool
@@ -99,15 +99,15 @@ def parsePDB(*pdb, **kwargs):
 
     :arg pdb: one PDB identifier or filename, or a list of them.
         If needed, PDB files are downloaded using :func:`.fetchPDB()` function.
-    
+
     You can also provide arguments that you would like passed on to fetchPDB().
 
     :arg extend_biomol: whether to extend the list of results with a list
-        rather than appending, which can create a mixed list, 
+        rather than appending, which can create a mixed list,
         especially when biomol=True.
         Default value is False to reproduce previous behaviour.
         This value is ignored when result is not a list (header=True or model=0).
-    :type extend_biomol: bool 
+    :type extend_biomol: bool
     """
     extend_biomol = kwargs.pop('extend_biomol', False)
 
@@ -119,7 +119,7 @@ def parsePDB(*pdb, **kwargs):
         if isListLike(pdb[0]) or isinstance(pdb[0], dict):
             pdb = pdb[0]
             n_pdb = len(pdb)
-            
+
     if n_pdb == 1:
         return _parsePDB(pdb[0], **kwargs)
     else:
@@ -139,7 +139,7 @@ def parsePDB(*pdb, **kwargs):
             for key in lstkwargs:
                 kwargs[key] = lstkwargs[key][i]
             c = kwargs.get('chain','')
-            LOGGER.update(i, 'Retrieving {0}...'.format(p+c), 
+            LOGGER.update(i, 'Retrieving {0}...'.format(p+c),
                           label='_prody_parsePDB')
             result = _parsePDB(p, **kwargs)
             if not isinstance(result, tuple):
@@ -151,7 +151,7 @@ def parsePDB(*pdb, **kwargs):
 
         results = list(zip(*results))
         LOGGER.finish()
-       
+
         for i in reversed(range(len(results))):
             if all(j is None for j in results[i]):
                 results.pop(i)
@@ -216,7 +216,7 @@ def _parsePDB(pdb, **kwargs):
                 try:
                     LOGGER.info("Trying to parse EMD file instead")
                     return parseEMD(pdb+chain, **kwargs)
-                except:                
+                except:
                     raise IOError('PDB file for {0} could not be downloaded.'
                                 .format(pdb))
         pdb = filename
@@ -246,7 +246,7 @@ def _parsePDB(pdb, **kwargs):
             except ValueError:
                 LOGGER.warn("Could not parse anything so returning None")
                 return None
-            except:                
+            except:
                 raise IOError('PDB file for {0} could not be downloaded.'
                               .format(pdb))
 
@@ -257,8 +257,8 @@ def parsePDBStream(stream, **kwargs):
     parsed from a stream of PDB lines.
 
     :arg stream: Anything that implements the method ``readlines``
-        (e.g. :class:`file`, buffer, stdin)""" 
-    
+        (e.g. :class:`file`, buffer, stdin)"""
+
     model = kwargs.get('model')
     header = kwargs.get('header', False)
     assert isinstance(header, bool), 'header must be a boolean'
@@ -326,6 +326,7 @@ def parsePDBStream(stream, **kwargs):
             try:
                 ag.setBonds(bonds)
             except ValueError:
+                raise
                 LOGGER.warn('Bonds read from CONECT records do not apply to subset so were not added')
         if ag.numAtoms() > 0:
             LOGGER.report('{0} atoms and {1} coordinate set(s) were '
@@ -489,6 +490,7 @@ def _parsePDBLines(atomgroup, lines, split, model, chain, subset,
     start = split
     stop = len(lines)
     nmodel = 0
+    bonds_serial = []
     # if a specific model is requested, skip lines until that one
     if isPDB and model is not None and model != 1:
         for i in range(split, len(lines)):
@@ -537,7 +539,7 @@ def _parsePDBLines(atomgroup, lines, split, model, chain, subset,
             startswith = line[0:6].strip()
         else:
             startswith = fields[0]
-        
+
         if startswith == 'ATOM' or startswith == 'HETATM':
             if isPDB:
                 atomname = line[12:16].strip()
@@ -560,7 +562,7 @@ def _parsePDBLines(atomgroup, lines, split, model, chain, subset,
                 if not chid in chain:
                     i += 1
                     continue
-            
+
             if isPDB:
                 alt = line[16]
                 if alt not in which_altlocs and which_altlocs != 'all':
@@ -620,14 +622,14 @@ def _parsePDBLines(atomgroup, lines, split, model, chain, subset,
                         LOGGER.warn('failed to parse serial number in line {0}. Assigning it as 1.'
                                     .format(i))
                         serials[acount] = 1
-                    
+
             altlocs[acount] = alt
             atomnames[acount] = atomname
             resnames[acount] = resname
             chainids[acount] = chid
             if isPDB:
                 resnum_str = line[22:26]
-                icode = line[26] 
+                icode = line[26]
                 if dec:
                     try:
                         resnum = int(resnum_str)
@@ -638,7 +640,7 @@ def _parsePDBLines(atomgroup, lines, split, model, chain, subset,
                         if not warned_5_digit:
                             LOGGER.warn('parsed 5 digit residue number including numeric insertion code')
                             warned_5_digit = True
-                        resnum = int(str(resnum) + icode)          
+                        resnum = int(str(resnum) + icode)
                     else:
                         icodes[acount] = icode
 
@@ -654,7 +656,7 @@ def _parsePDBLines(atomgroup, lines, split, model, chain, subset,
                         else:
                             # lower case is found in hexadecimal PDB files
                             resnum = int(resnum_str, 16)
-                        
+
                     except ValueError:
                         if acount > 0:
                             LOGGER.warn('failed to parse residue number in line {0}. Assigning it by incrementing.'
@@ -758,19 +760,23 @@ def _parsePDBLines(atomgroup, lines, split, model, chain, subset,
             if bonds is not None:
                 atom_serial = line[6:11]
                 bonded1_serial = line[11:16]
-                bonds.append([int(atom_serial)-1-num_ters, int(bonded1_serial)-1-num_ters])
-                
+                # bonds.append([int(atom_serial)-1-num_ters, int(bonded1_serial)-1-num_ters])
+                bonds_serial.append([int(atom_serial), int(bonded1_serial)])
+
                 bonded2_serial = line[16:21]
                 if len(bonded2_serial.strip()):
-                    bonds.append([int(atom_serial)-1-num_ters, int(bonded2_serial)-1-num_ters])
+                    # bonds.append([int(atom_serial)-1-num_ters, int(bonded2_serial)-1-num_ters])
+                    bonds_serial.append([int(atom_serial), int(bonded2_serial)])
 
                 bonded3_serial = line[21:26]
                 if len(bonded3_serial.strip()):
-                    bonds.append([int(atom_serial)-1-num_ters, int(bonded3_serial)-1-num_ters])
-                    
-                bonded4_serial = line[27:31]
+                    # bonds.append([int(atom_serial)-1-num_ters, int(bonded3_serial)-1-num_ters])
+                    bonds_serial.append([int(atom_serial), int(bonded3_serial)])
+
+                bonded4_serial = line[26:31]  # fixed typo
                 if len(bonded4_serial.strip()):
-                    bonds.append([int(atom_serial)-1-num_ters, int(bonded4_serial)-1-num_ters])
+                    # bonds.append([int(atom_serial)-1-num_ters, int(bonded4_serial)-1-num_ters])
+                    bonds_serial.append([int(atom_serial), int(bonded4_serial)])
 
         elif not onlycoords and (startswith == 'TER   ' or
             startswith.strip() == 'TER'):
@@ -977,6 +983,18 @@ def _parsePDBLines(atomgroup, lines, split, model, chain, subset,
         charges.resize(acount, refcheck=False)
         atomgroup.setCharges(charges)
 
+    # hard-coded patch
+    serial_to_id = {}
+    for aidx in range(len(serials)):
+        serial_to_id[int(serials[aidx])] = aidx
+    # rematch the bond-atom serial numbers to atom ids
+    for bond in bonds_serial:
+        try:
+            bonds.append([serial_to_id[bond[0]], serial_to_id[bond[1]]])
+        except KeyError:
+            LOGGER.warn(f"Bond connecting atom serial numbers {bond[0]}"
+                        f" and {bond[1]} contains atoms not included in the model")
+
     if altloc and altloc_torf:
         _evalAltlocs(atomgroup, altloc, chainids, resnums, resnames, atomnames)
 
@@ -1045,7 +1063,7 @@ def _evalAltlocs(atomgroup, altloc, chainids, resnums, resnames, atomnames):
 HELIXLINE = ('HELIX  {serNum:3d} {helixID:>3s} '
              '{initResName:<3s} {initChainID:1s} {initSeqNum:4d}{initICode:1s} '
              '{endResName:<3s} {endChainID:1s} {endSeqNum:4d}{endICode:1s}'
-             '{helixClass:2d}                               {length:5d}\n')             
+             '{helixClass:2d}                               {length:5d}\n')
 
 SHEETLINE = ('SHEET  {strand:3d} {sheetID:>3s}{numStrands:2d} '
              '{initResName:3s} {initChainID:1s}{initSeqNum:4d}{initICode:1s} '
@@ -1113,7 +1131,7 @@ def _evalAltlocs(atomgroup, altloc, chainids, resnums, resnames, atomnames):
     :arg hybrid36: whether to use hybrid36 format for atoms with serial
         greater than 99999. Hexadecimal is used otherwise.
         Default is False
-    :type hybrid36: bool 
+    :type hybrid36: bool
     """
 
 def writeChainsList(chains, filename):
@@ -1122,7 +1140,7 @@ def writeChainsList(chains, filename):
 
     :arg chains: a list of :class:`.Chain` objects
     :type chains: list
-    
+
     :arg filename: the name of the file to be written
     :type filename: str
     """
@@ -1140,10 +1158,10 @@ def parseChainsList(filename):
     :arg filename: the name of the file to be read
     :type filename: str
 
-    Returns: lists containing an :class:'.AtomGroup' for each PDB, 
+    Returns: lists containing an :class:'.AtomGroup' for each PDB,
     the headers for those PDBs, and the requested :class:`.Chain` objects
     """
-    
+
     fi = open(filename,'r')
     lines = fi.readlines()
     fi.close()
@@ -1179,7 +1197,7 @@ def writePDBStream(stream, atoms, csets=None, **kwargs):
 
     :arg stream: anything that implements a :meth:`write` method (e.g. file,
         buffer, stdout)
-        
+
     :arg renumber: whether to renumber atoms with serial indices
         Default is **True**
     :type renumber: bool
@@ -1343,13 +1361,13 @@ def writePDBStream(stream, atoms, csets=None, **kwargs):
                 helix_icodes = icodes[torf]
                 L = helix_resnums[-1] - helix_resnums[0] + 1
 
-                stream.write(HELIXLINE.format(serNum=i, helixID=helix_secids[0], 
-                            initResName=helix_resnames[0], initChainID=helix_chainids[0], 
+                stream.write(HELIXLINE.format(serNum=i, helixID=helix_secids[0],
+                            initResName=helix_resnames[0], initChainID=helix_chainids[0],
                             initSeqNum=helix_resnums[0], initICode=helix_icodes[0],
-                            endResName=helix_resnames[-1], endChainID=helix_chainids[-1], 
+                            endResName=helix_resnames[-1], endChainID=helix_chainids[-1],
                             endSeqNum=helix_resnums[-1], endICode=helix_icodes[-1],
                             helixClass=helix_secclasses[0], length=L))
-        
+
         # write strands
         torf_all_sheets = isSheet(secstrs)
         sheet_secids = secids[torf_all_sheets]
@@ -1368,9 +1386,9 @@ def writePDBStream(stream, atoms, csets=None, **kwargs):
                 strand_icodes = icodes[torf_strand]
 
                 stream.write(SHEETLINE.format(strand=i, sheetID=sheet_id, numStrands=numStrands,
-                            initResName=strand_resnames[0], initChainID=strand_chainids[0], 
+                            initResName=strand_resnames[0], initChainID=strand_chainids[0],
                             initSeqNum=strand_resnums[0], initICode=strand_icodes[0],
-                            endResName=strand_resnames[-1], endChainID=strand_chainids[-1], 
+                            endResName=strand_resnames[-1], endChainID=strand_chainids[-1],
                             endSeqNum=strand_resnums[-1], endICode=strand_icodes[-1],
                             sense=strand_secclasses[0]))
         pass
@@ -1393,7 +1411,7 @@ def writePDBStream(stream, atoms, csets=None, **kwargs):
             warned_hybrid36 = False
 
         warned_5_digit = False
-            
+
         for i, xyz in enumerate(coords):
             if hybrid36:
                 pdbline = PDBLINE_H36
@@ -1421,7 +1439,7 @@ def writePDBStream(stream, atoms, csets=None, **kwargs):
                     pdbline = PDBLINE_GE100K_GE10K
                     anisouline = ANISOULINE_GE100K_GE10K
                     LOGGER.warn('Resnums are exceeding 9999 and hexadecimal format is being used for indices and resnums')
-                
+
                 elif reached_max_n_res and not reached_max_n_atom and (i == MAX_N_ATOM or serials[i] > MAX_N_ATOM):
                     reached_max_n_atom = True
                     pdbline = PDBLINE_GE100K_GE10K
@@ -1439,7 +1457,7 @@ def writePDBStream(stream, atoms, csets=None, **kwargs):
                 if len(str(resnum)) == 5:
                     if icodes[i] == '':
                         icodes[i] = str(resnum)[4]
-                        
+
                         if not warned_5_digit:
                             LOGGER.warn('Storing 5-digit resnums using insertion codes')
                             warned_5_digit = True
@@ -1447,21 +1465,21 @@ def writePDBStream(stream, atoms, csets=None, **kwargs):
                         LOGGER.warn('Truncating 5-digit resnum as insertion code is busy.')
 
                     resnum = int(str(resnum)[:4])
-                
+
                 elif len(str(resnum)) > 5:
                     if not warned_5_digit:
                         LOGGER.warn('Truncating {0}-digit resnum as too long to be '
                                     'supported by insertion code.'.format(len(str(resnum))))
                         warned_5_digit = True
-                        
+
                     resnum = int(str(resnum)[:4])
             else:
                 final_resnum = '%4x' % int(resnum)
-                
+
                 if len(str(final_resnum)) == 5:
                     if icodes[i] == '':
                         icodes[i] = str(final_resnum)[4]
-                        
+
                         if not warned_5_digit:
                             LOGGER.warn('Storing 5-digit hex resnums using insertion codes')
                             warned_5_digit = True
@@ -1469,16 +1487,16 @@ def writePDBStream(stream, atoms, csets=None, **kwargs):
                         LOGGER.warn('Truncating 5-digit hex resnum as insertion code is busy.')
 
                     resnum = int(str(final_resnum)[:4], 16)
-                
+
                 elif len(str(final_resnum)) > 5:
                     if not warned_5_digit:
                         LOGGER.warn('Truncating {0}-digit hex resnum ({1}) as too long to be '
-                                    'supported by insertion code.'.format(len(str(final_resnum)), 
+                                    'supported by insertion code.'.format(len(str(final_resnum)),
                                                                           final_resnum))
                         warned_5_digit = True
-                        
+
                     resnum = int(str(final_resnum)[:4], 16)
-              
+
 
             write(pdbline % (hetero[i], serial,
                              atomnames[i], altlocs[i],
@@ -1511,7 +1529,7 @@ def writePDBStream(stream, atoms, csets=None, **kwargs):
 def writePDB(filename, atoms, csets=None, autoext=True, **kwargs):
     """Write *atoms* in PDB format to a file with name *filename* and return
     *filename*.  If *filename* ends with :file:`.gz`, a compressed file will
-    be written.        
+    be written.
 
     :arg renumber: whether to renumber atoms with serial indices
         Default is **True**
@@ -1568,12 +1586,12 @@ def writePQRStream(stream, atoms, **kwargs):
                 length = ss_end - ss_start + 1
 
                 entries = [init.getSecindex(), init.getSecid(),
-                           init.getResname(), init.getChid(), 
+                           init.getResname(), init.getChid(),
                            init.getResnum(), init.getIcode(),
                            end.getResname(), end.getChid(),
                            end.getResnum(), end.getIcode(),
                            init.getSecclass()]
-                
+
                 if ssa[ss_end] == 'H':
                     helix.append(["HELIX "] + entries +
                                  ['', length])
@@ -1607,7 +1625,7 @@ def writePQRStream(stream, atoms, **kwargs):
 
     num_strands = item1[1]
     for item2 in sorted_sheet[i-num_strands+1:]:
-        item2.append(num_strands)    
+        item2.append(num_strands)
 
     format_sheet = ('{0:6s} {1:3d} {2:3s}{12:2d} ' +
                     '{3:3s} {4:1s}{5:4d}{6:1s}' +
@@ -1651,7 +1669,7 @@ def writePQRStream(stream, atoms, **kwargs):
               '{8:8.3f} {9:8.3f} {10:8.3f}' +
               '{11:8.4f} {12:7.4f}\n').format
     coords = atoms._getCoords()
-    
+
     for i, xyz in enumerate(coords):
         write(format(hetero[i], i+1, atomnames[i], altlocs[i],
                      resnames[i], chainids[i], int(resnums[i]),

From 56055ab8114e0452dfe7d6de9fd1415df98957ab Mon Sep 17 00:00:00 2001
From: Zhuoran Qiao <zqiao@caltech.edu>
Date: Tue, 26 Apr 2022 00:15:43 -0700
Subject: [PATCH 2/5] switch back to warning

---
 prody/proteins/pdbfile.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/prody/proteins/pdbfile.py b/prody/proteins/pdbfile.py
index a3bf68c35..140739b89 100644
--- a/prody/proteins/pdbfile.py
+++ b/prody/proteins/pdbfile.py
@@ -326,7 +326,6 @@ def parsePDBStream(stream, **kwargs):
             try:
                 ag.setBonds(bonds)
             except ValueError:
-                raise
                 LOGGER.warn('Bonds read from CONECT records do not apply to subset so were not added')
         if ag.numAtoms() > 0:
             LOGGER.report('{0} atoms and {1} coordinate set(s) were '

From f53d77c877e796a4dbbcef253dcc52cc6597a519 Mon Sep 17 00:00:00 2001
From: Zhuoran Qiao <zqiao@caltech.edu>
Date: Tue, 26 Apr 2022 09:36:49 -0700
Subject: [PATCH 3/5] restore format changes

---
 prody/atomic/atomgroup.py | 22 +++++++++++-----------
 prody/proteins/pdbfile.py | 36 ++++++++++++++++++------------------
 2 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/prody/atomic/atomgroup.py b/prody/atomic/atomgroup.py
index 3f39f06f6..2763d5862 100644
--- a/prody/atomic/atomgroup.py
+++ b/prody/atomic/atomgroup.py
@@ -1095,8 +1095,8 @@ def setBonds(self, bonds):
         index 1"``.  See :mod:`.select` module documentation for details.
         Also, a data array with number of bonds will be generated and stored
         with label *numbonds*.  This can be used in atom selections, e.g.
-        ``'numbonds 0'`` can be used to select ions in a system. If *bonds*
-        is empty or **None**, then all bonds will be removed for this
+        ``'numbonds 0'`` can be used to select ions in a system. If *bonds* 
+        is empty or **None**, then all bonds will be removed for this 
         :class:`.AtomGroup`. """
 
         if bonds is None or len(bonds) == 0:
@@ -1126,7 +1126,7 @@ def setBonds(self, bonds):
         self._fragments = None
 
     def numBonds(self):
-        """Returns number of bonds.  Use :meth:`setBonds` or
+        """Returns number of bonds.  Use :meth:`setBonds` or 
         :meth:`inferBonds` for setting bonds."""
 
         if self._bonds is not None:
@@ -1134,7 +1134,7 @@ def numBonds(self):
         return 0
 
     def getBonds(self):
-        """Returns bonds.  Use :meth:`setBonds` or
+        """Returns bonds.  Use :meth:`setBonds` or 
         :meth:`inferBonds` for setting bonds."""
 
         if self._bonds is not None:
@@ -1171,7 +1171,7 @@ def iterBonds(self):
                 yield Bond(self, bond, acsi)
 
     def _iterBonds(self):
-        """Yield pairs of bonded atom indices. Use :meth:`setBonds`
+        """Yield pairs of bonded atom indices. Use :meth:`setBonds` 
         or `inferBonds` for setting bonds."""
 
         if self._bonds is not None:
@@ -1366,8 +1366,8 @@ def setDonors(self, donors):
         index 1"``.  See :mod:`.select` module documentation for details.
         Also, a data array with number of donors will be generated and stored
         with label *numdonors*.  This can be used in atom selections, e.g.
-        ``'numdonors 0'`` can be used to select ions in a system. If *donors*
-        is empty or **None**, then all donors will be removed for this
+        ``'numdonors 0'`` can be used to select ions in a system. If *donors* 
+        is empty or **None**, then all donors will be removed for this 
         :class:`.AtomGroup`. """
 
         if donors is None or len(donors) == 0:
@@ -1432,8 +1432,8 @@ def setAcceptors(self, acceptors):
         index 1"``.  See :mod:`.select` module documentation for details.
         Also, a data array with number of acceptors will be generated and stored
         with label *numacceptors*.  This can be used in atom selections, e.g.
-        ``'numacceptors 0'`` can be used to select ions in a system. If *acceptors*
-        is empty or **None**, then all acceptors will be removed for this
+        ``'numacceptors 0'`` can be used to select ions in a system. If *acceptors* 
+        is empty or **None**, then all acceptors will be removed for this 
         :class:`.AtomGroup`. """
 
         if acceptors is None or len(acceptors) == 0:
@@ -1498,8 +1498,8 @@ def setNBExclusions(self, nbexclusions):
         index 1"``.  See :mod:`.select` module documentation for details.
         Also, a data array with number of nbexclusions will be generated and stored
         with label *numnbexclusions*.  This can be used in atom selections, e.g.
-        ``'numnbexclusions 0'`` can be used to select ions in a system. If *nbexclusions*
-        is empty or **None**, then all nbexclusions will be removed for this
+        ``'numnbexclusions 0'`` can be used to select ions in a system. If *nbexclusions* 
+        is empty or **None**, then all nbexclusions will be removed for this 
         :class:`.AtomGroup`. """
 
         if nbexclusions is None or len(nbexclusions) == 0:
diff --git a/prody/proteins/pdbfile.py b/prody/proteins/pdbfile.py
index 140739b89..1beaf517f 100644
--- a/prody/proteins/pdbfile.py
+++ b/prody/proteins/pdbfile.py
@@ -26,7 +26,7 @@
            'writePDBStream', 'writePDB', 'writeChainsList', 'writePQR',
            'writePQRStream']
 
-MAX_N_ATOM = 99999
+MAX_N_ATOM = 99999 
 MAX_N_RES = 9999
 
 class PDBParseError(Exception):
@@ -71,9 +71,9 @@ class PDBParseError(Exception):
 
     :arg biomol: if **True**, biomolecules are obtained by transforming the
         coordinates using information from header section will be returned.
-        This option uses :func:`.buildBiomolecules` and as noted there,
-        atoms in biomolecules are ordered according to the original chain
-        IDs. Chains may have the same chain ID, in which case they are given
+        This option uses :func:`.buildBiomolecules` and as noted there, 
+        atoms in biomolecules are ordered according to the original chain 
+        IDs. Chains may have the same chain ID, in which case they are given 
         different segment names.
         Default is **False**
     :type biomol: bool
@@ -99,15 +99,15 @@ def parsePDB(*pdb, **kwargs):
 
     :arg pdb: one PDB identifier or filename, or a list of them.
         If needed, PDB files are downloaded using :func:`.fetchPDB()` function.
-
+    
     You can also provide arguments that you would like passed on to fetchPDB().
 
     :arg extend_biomol: whether to extend the list of results with a list
-        rather than appending, which can create a mixed list,
+        rather than appending, which can create a mixed list, 
         especially when biomol=True.
         Default value is False to reproduce previous behaviour.
         This value is ignored when result is not a list (header=True or model=0).
-    :type extend_biomol: bool
+    :type extend_biomol: bool 
     """
     extend_biomol = kwargs.pop('extend_biomol', False)
 
@@ -139,7 +139,7 @@ def parsePDB(*pdb, **kwargs):
             for key in lstkwargs:
                 kwargs[key] = lstkwargs[key][i]
             c = kwargs.get('chain','')
-            LOGGER.update(i, 'Retrieving {0}...'.format(p+c),
+            LOGGER.update(i, 'Retrieving {0}...'.format(p+c), 
                           label='_prody_parsePDB')
             result = _parsePDB(p, **kwargs)
             if not isinstance(result, tuple):
@@ -257,7 +257,7 @@ def parsePDBStream(stream, **kwargs):
     parsed from a stream of PDB lines.
 
     :arg stream: Anything that implements the method ``readlines``
-        (e.g. :class:`file`, buffer, stdin)"""
+        (e.g. :class:`file`, buffer, stdin)""" 
 
     model = kwargs.get('model')
     header = kwargs.get('header', False)
@@ -1130,7 +1130,7 @@ def _evalAltlocs(atomgroup, altloc, chainids, resnums, resnames, atomnames):
     :arg hybrid36: whether to use hybrid36 format for atoms with serial
         greater than 99999. Hexadecimal is used otherwise.
         Default is False
-    :type hybrid36: bool
+    :type hybrid36: bool 
     """
 
 def writeChainsList(chains, filename):
@@ -1157,7 +1157,7 @@ def parseChainsList(filename):
     :arg filename: the name of the file to be read
     :type filename: str
 
-    Returns: lists containing an :class:'.AtomGroup' for each PDB,
+    Returns: lists containing an :class:'.AtomGroup' for each PDB, 
     the headers for those PDBs, and the requested :class:`.Chain` objects
     """
 
@@ -1360,10 +1360,10 @@ def writePDBStream(stream, atoms, csets=None, **kwargs):
                 helix_icodes = icodes[torf]
                 L = helix_resnums[-1] - helix_resnums[0] + 1
 
-                stream.write(HELIXLINE.format(serNum=i, helixID=helix_secids[0],
-                            initResName=helix_resnames[0], initChainID=helix_chainids[0],
+                stream.write(HELIXLINE.format(serNum=i, helixID=helix_secids[0], 
+                            initResName=helix_resnames[0], initChainID=helix_chainids[0], 
                             initSeqNum=helix_resnums[0], initICode=helix_icodes[0],
-                            endResName=helix_resnames[-1], endChainID=helix_chainids[-1],
+                            endResName=helix_resnames[-1], endChainID=helix_chainids[-1], 
                             endSeqNum=helix_resnums[-1], endICode=helix_icodes[-1],
                             helixClass=helix_secclasses[0], length=L))
 
@@ -1385,9 +1385,9 @@ def writePDBStream(stream, atoms, csets=None, **kwargs):
                 strand_icodes = icodes[torf_strand]
 
                 stream.write(SHEETLINE.format(strand=i, sheetID=sheet_id, numStrands=numStrands,
-                            initResName=strand_resnames[0], initChainID=strand_chainids[0],
+                            initResName=strand_resnames[0], initChainID=strand_chainids[0], 
                             initSeqNum=strand_resnums[0], initICode=strand_icodes[0],
-                            endResName=strand_resnames[-1], endChainID=strand_chainids[-1],
+                            endResName=strand_resnames[-1], endChainID=strand_chainids[-1], 
                             endSeqNum=strand_resnums[-1], endICode=strand_icodes[-1],
                             sense=strand_secclasses[0]))
         pass
@@ -1490,7 +1490,7 @@ def writePDBStream(stream, atoms, csets=None, **kwargs):
                 elif len(str(final_resnum)) > 5:
                     if not warned_5_digit:
                         LOGGER.warn('Truncating {0}-digit hex resnum ({1}) as too long to be '
-                                    'supported by insertion code.'.format(len(str(final_resnum)),
+                                    'supported by insertion code.'.format(len(str(final_resnum)), 
                                                                           final_resnum))
                         warned_5_digit = True
 
@@ -1585,7 +1585,7 @@ def writePQRStream(stream, atoms, **kwargs):
                 length = ss_end - ss_start + 1
 
                 entries = [init.getSecindex(), init.getSecid(),
-                           init.getResname(), init.getChid(),
+                           init.getResname(), init.getChid(), 
                            init.getResnum(), init.getIcode(),
                            end.getResname(), end.getChid(),
                            end.getResnum(), end.getIcode(),

From 37749076659ba878732d351f68ff5cd31e6cecca Mon Sep 17 00:00:00 2001
From: Zhuoran Qiao <zqiao@caltech.edu>
Date: Thu, 28 Apr 2022 14:49:53 -0700
Subject: [PATCH 4/5] cleanup and log warning for unsupported mmcif bond
 parsing

---
 prody/proteins/ciffile.py |  4 ++++
 prody/proteins/pdbfile.py | 18 ++++++++----------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/prody/proteins/ciffile.py b/prody/proteins/ciffile.py
index 3d583511a..adeb464f3 100644
--- a/prody/proteins/ciffile.py
+++ b/prody/proteins/ciffile.py
@@ -72,6 +72,10 @@ def parseMMCIF(pdb, **kwargs):
     """
     chain = kwargs.pop('chain', None)
     title = kwargs.get('title', None)
+    auto_bonds = SETTINGS.get('auto_bonds')
+    get_bonds = kwargs.get('bonds', auto_bonds)
+    if get_bonds:
+        LOGGER.warn('Parsing struct_conn information from mmCIF is current unsupported and no bond information is added to the results')
     if not os.path.isfile(pdb):
         if len(pdb) == 5 and pdb.isalnum():
             if chain is None:
diff --git a/prody/proteins/pdbfile.py b/prody/proteins/pdbfile.py
index 1beaf517f..4003c2cad 100644
--- a/prody/proteins/pdbfile.py
+++ b/prody/proteins/pdbfile.py
@@ -209,6 +209,9 @@ def _parsePDB(pdb, **kwargs):
             kwargs['title'] = title
         filename = fetchPDB(pdb, **kwargs)
         if filename is None:
+            # ZQ 04/26: escape from mmcif
+            raise IOError('PDB file for {0} could not be downloaded.'
+                        .format(pdb))
             try:
                 LOGGER.info("Trying to parse mmCIF file instead")
                 return parseMMCIF(pdb+chain, **kwargs)
@@ -759,22 +762,18 @@ def _parsePDBLines(atomgroup, lines, split, model, chain, subset,
             if bonds is not None:
                 atom_serial = line[6:11]
                 bonded1_serial = line[11:16]
-                # bonds.append([int(atom_serial)-1-num_ters, int(bonded1_serial)-1-num_ters])
                 bonds_serial.append([int(atom_serial), int(bonded1_serial)])
 
                 bonded2_serial = line[16:21]
                 if len(bonded2_serial.strip()):
-                    # bonds.append([int(atom_serial)-1-num_ters, int(bonded2_serial)-1-num_ters])
                     bonds_serial.append([int(atom_serial), int(bonded2_serial)])
 
                 bonded3_serial = line[21:26]
                 if len(bonded3_serial.strip()):
-                    # bonds.append([int(atom_serial)-1-num_ters, int(bonded3_serial)-1-num_ters])
                     bonds_serial.append([int(atom_serial), int(bonded3_serial)])
 
                 bonded4_serial = line[26:31]  # fixed typo
                 if len(bonded4_serial.strip()):
-                    # bonds.append([int(atom_serial)-1-num_ters, int(bonded4_serial)-1-num_ters])
                     bonds_serial.append([int(atom_serial), int(bonded4_serial)])
 
         elif not onlycoords and (startswith == 'TER   ' or
@@ -982,17 +981,16 @@ def _parsePDBLines(atomgroup, lines, split, model, chain, subset,
         charges.resize(acount, refcheck=False)
         atomgroup.setCharges(charges)
 
-    # hard-coded patch
-    serial_to_id = {}
-    for aidx in range(len(serials)):
-        serial_to_id[int(serials[aidx])] = aidx
     # rematch the bond-atom serial numbers to atom ids
+    serial_to_id = {int(serial): aidx for aidx, serial in enumerate(serials)}
     for bond in bonds_serial:
         try:
             bonds.append([serial_to_id[bond[0]], serial_to_id[bond[1]]])
         except KeyError:
-            LOGGER.warn(f"Bond connecting atom serial numbers {bond[0]}"
-                        f" and {bond[1]} contains atoms not included in the model")
+            LOGGER.warn("Bond connecting atom serial numbers {0}"
+                        " and {1} contains atoms not included in the model"
+                        .format(bond[0], bond[1])
+                        )
 
     if altloc and altloc_torf:
         _evalAltlocs(atomgroup, altloc, chainids, resnums, resnames, atomnames)

From ad13c6834b461ecae9334753ef51f35ec3d1f47d Mon Sep 17 00:00:00 2001
From: Zhuoran Qiao <zqiao@caltech.edu>
Date: Thu, 28 Apr 2022 14:51:58 -0700
Subject: [PATCH 5/5] revert inhouse changes

---
 prody/proteins/pdbfile.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/prody/proteins/pdbfile.py b/prody/proteins/pdbfile.py
index 4003c2cad..d014a21a5 100644
--- a/prody/proteins/pdbfile.py
+++ b/prody/proteins/pdbfile.py
@@ -209,9 +209,6 @@ def _parsePDB(pdb, **kwargs):
             kwargs['title'] = title
         filename = fetchPDB(pdb, **kwargs)
         if filename is None:
-            # ZQ 04/26: escape from mmcif
-            raise IOError('PDB file for {0} could not be downloaded.'
-                        .format(pdb))
             try:
                 LOGGER.info("Trying to parse mmCIF file instead")
                 return parseMMCIF(pdb+chain, **kwargs)