From b1ba71f0b3b786f9505cfc21deb8056ee7d2179a Mon Sep 17 00:00:00 2001
From: Andrew Jewett <jewett.aij@gmail.com>
Date: Tue, 7 Jul 2020 22:08:42 -0700
Subject: [PATCH] added the ability to use regular expressions to match @atom
 type names appearing in pair_coeff commands.  Regular expressions can also
 match @bond, @angle, @dihedral, and @improper type names when they appear in
 bond_coeff, angle_coeff, dihedral_coeff, and improper_coeff commands.  None
 of this new functionality has been tested yet.  Hopefully I did not break
 anything.

---
 .../moltemplate_manual.tex                    |  25 ++---
 .../moltemplate_files/nanotube.lt             |   4 +-
 .../moltemplate_files/water_box.lt            |   2 +-
 moltemplate/nbody_by_type.py                  |   2 +-
 moltemplate/postprocess_coeffs.py             | 102 +++++++++++++-----
 5 files changed, 94 insertions(+), 41 deletions(-)

diff --git a/doc/moltemplate_manual_src/moltemplate_manual.tex b/doc/moltemplate_manual_src/moltemplate_manual.tex
index 9f657028..5c5134f4 100644
--- a/doc/moltemplate_manual_src/moltemplate_manual.tex
+++ b/doc/moltemplate_manual_src/moltemplate_manual.tex
@@ -577,8 +577,8 @@ \subsection{Moltemplate commands}
 commands to define Atoms, Bonds, Angles, Coeffs, etc...
 \textit{(If that molecule type exists already, 
 then this will append additional \textbf{content} to its definition.)}
-\textbf{new} and \textbf{delete} commands can be used 
-to create or delete molecular subunits \textit{within} this molecule.
+%\textbf{new} and \textbf{delete} commands can be used 
+%to create or delete molecular subunits \textit{within} this molecule.
 (See the \textit{SPCE}, \textit{Monomer}, and \textit{Butane} 
  molecules, and the \textit{TraPPE} namespace 
  defined in sections \ref{sec:spce_example}, \ref{sec:2bead},
@@ -675,10 +675,11 @@ \subsection{Moltemplate commands}
 \} \\
 \end{tabular}
 &
-Create a new molecule \textbf{type} based on multiple existing molecule types.
+Define a new object \textbf{type}
+based one or more existing object types.
 Atom types, bond types, angle types (etc) which are defined in
 \textit{Mol1}, or \textit{Mol2}, ... are available inside the
-new molecule.
+new object.
 \textit{Additional content} 
 (including more \textit{write()} or \textit{write\_once()}
 or \textit{new} commands)
@@ -715,10 +716,10 @@ \subsection{Moltemplate commands}
 \textbf{delete} \textit{molecule}
 &
 Delete the \textit{molecule} instance.
-(This command can appear inside a molecule's definition 
- to delete a specific molecular subunit within a molecule.  In that case,
- it will be carried out in every copy of that molecule type.
- \textbf{delete} can also be used to delete specific
+%(This command can appear inside a molecule's definition 
+% to delete a specific molecular subunit within a molecule.  In that case,
+% it will be carried out in every copy of that molecule type.)
+(\textbf{delete} can also be used to delete 
  atoms, bonds, angles, dihedrals, and improper interactions.)
 See section \ref{sec:delete}.
 \\
@@ -726,7 +727,7 @@ \subsection{Moltemplate commands}
 \textbf{delete} \textit{molecules}[\textit{range}]
 &
 Delete a range of molecules specified by 
-\mbox{\textit{molecule}[\textit{range}]}.
+\mbox{\textit{molecules}[\textit{range}]}.
 (This also works for multidimensional arrays.
  See sections \ref{sec:delete} and \ref{sec:delete_holes}.)
 \\
@@ -774,7 +775,7 @@ \subsection{Moltemplate commands}
 \textbf{import} \textit{file}
 &
 Insert the contents of file \textit{file} here,
-preventing circular inclusions.
+preventing circular (self-referential) inclusions.
 \textit{(recommended)}
 \\
 \hline
@@ -1039,7 +1040,7 @@ \subsection{Coordinate transformations}
 \mbox{$x_0,y_0,z_0$} which specify the point around which
 you want the scaling to occur.
 (This point will be a \textit{fixed point}.
- Of omitted, the origin is used.)
+ If omitted, the origin is used.)
 \\
 \hline
 \begin{tabular}[t]{l}
@@ -4418,7 +4419,7 @@ \subsection*{Regular expressions}
 Regular-expressions can also be used to match potential atom and bond types.
 (To use regular expressions, surround the atom and 
 bond types on either side by slashes.  
-For example: \mbox{@atom:C[1-5]/}, should match 
+For example: \mbox{/@atom:C[1-5]/}, should match 
 \mbox{@atom:C1} through \mbox{@atom:C6}.)
 \textit{Note: This feature has not been tested as of 2019-9-03.}
 
diff --git a/examples/all_atom/force_field_explicit_parameters/nanotube+water/moltemplate_files/nanotube.lt b/examples/all_atom/force_field_explicit_parameters/nanotube+water/moltemplate_files/nanotube.lt
index 06a5e943..20a9bc7f 100644
--- a/examples/all_atom/force_field_explicit_parameters/nanotube+water/moltemplate_files/nanotube.lt
+++ b/examples/all_atom/force_field_explicit_parameters/nanotube+water/moltemplate_files/nanotube.lt
@@ -23,8 +23,8 @@ nanotube = new GrapheneXZ.move(0, 5.457193512764, 0)            # 5.45 = R
                                                                 #2.13= d*1.5
 
 
-                  # Note: The length is 12 hexegons, the circumference is
-                  # 14 hexegons (~=25.56 and 34.43 Angstroms, respectively).
+                  # Note: The length is 12 hexagons, the circumference is
+                  # 14 hexagons (~=25.56 and 34.43 Angstroms, respectively).
 
 
 # Move all of the unit-cells in the nanotube between the two graphene sheets.
diff --git a/examples/all_atom/force_field_explicit_parameters/nanotube+water/moltemplate_files/water_box.lt b/examples/all_atom/force_field_explicit_parameters/nanotube+water/moltemplate_files/water_box.lt
index c825276f..d4a8f33c 100644
--- a/examples/all_atom/force_field_explicit_parameters/nanotube+water/moltemplate_files/water_box.lt
+++ b/examples/all_atom/force_field_explicit_parameters/nanotube+water/moltemplate_files/water_box.lt
@@ -8,7 +8,7 @@ wat  =  new SPCE  [9].move(3.5526287,    0,      0 )
                   [9].move(1.77631435, 3.3133,   0 )
                   [6].move(   0,         0,    3.45)
 
-# Optional: Center the water box at the origin. (Not really necessary.)
+# Optional: Move the water below the graphene sheets
 
 wat[*][*][*].move(-23.9802437, -14.90985, 11.47)
 
diff --git a/moltemplate/nbody_by_type.py b/moltemplate/nbody_by_type.py
index 5aaee0aa..9ed3d85e 100755
--- a/moltemplate/nbody_by_type.py
+++ b/moltemplate/nbody_by_type.py
@@ -3,7 +3,7 @@
 # Author: Andrew Jewett (jewett.aij at g mail)
 #         http://www.chem.ucsb.edu/~sheagroup
 # License: 3-clause BSD License  (See LICENSE.TXT)
-# Copyright (c) 2011, Regents of the University of California
+# Copyright (c) 2013, Regents of the University of California
 # All rights reserved.
 
 man_page_text = """
diff --git a/moltemplate/postprocess_coeffs.py b/moltemplate/postprocess_coeffs.py
index d251bc6f..e4f6830b 100755
--- a/moltemplate/postprocess_coeffs.py
+++ b/moltemplate/postprocess_coeffs.py
@@ -42,6 +42,7 @@
 
 
 import sys
+import re
 import gc
 
 try:
@@ -58,8 +59,8 @@
 g_module_name = g_filename
 if g_filename.rfind('.py') != -1:
     g_module_name = g_filename[:g_filename.rfind('.py')]
-g_date_str = '2018-6-09'
-g_version_str = '0.2.0'
+g_date_str = '2020-7-07'
+g_version_str = '0.3.0'
 g_program_name = g_filename
 #sys.stderr.write(g_program_name+' v'+g_version_str+' '+g_date_str+' ')
 
@@ -156,13 +157,27 @@ def main():
             if (not line_orig) or (line_orig == ''):
                 break
             tokens = line_orig.strip().split('@')
+            # If the second token is surrounded by '/' characters, interpret
+            # it as a regular expression.
+            token1_is_re = ((len(tokens) >= 2) and
+                            (len(tokens[1]) >= 2) and
+                            (tokens[1][0] == '/') and (tokens[1][-1] == '/'))
+            # If the second token contains wildcard characters, interpret
+            # it as a wildcard (ie. glob) expression.
+            token1_is_wild = ((len(tokens) >= 2) and
+                              HasWildcard(tokens[1]) #does it contain '*' or '?'
+                              and 
+                              (tokens[1][0] != '{')) #(ignore * or ? in {})
 
             if ((len(tokens) >= 2) and
                 (tokens[0].find('bond_coeff') == 0) and
-                #does this token contain '*' or '?'
-                HasWildcard(tokens[1]) and
-                (tokens[1][0:1] != '{')):  #(don't pattern match * in {})?'
-                left_paren, typepattern, text_after = ExtractVarName(tokens[1])
+                (token1_is_re or token1_is_wild)):
+                if token1_is_re:
+                    regex_str = typestr[1:-1]
+                    left_paren = text_after = ''
+                    typepattern = re.compile(regex_str)
+                else:
+                    left_paren, typepattern, text_after = ExtractVarName(tokens[1])
                 for btype in bond_types:
                     if MatchesPattern(btype, typepattern):
                         #assert(left_paren == '')
@@ -170,11 +185,14 @@ def main():
                         sys.stdout.write('@'.join(tokens) + '\n')
 
             elif ((len(tokens) >= 2) and
-                  (tokens[0].find('angle_coeff') == 0) and
-                  #does this token contain '*' or '?'
-                  HasWildcard(tokens[1]) and
-                  (tokens[1][0:1] != '{')):
-                left_paren, typepattern, text_after = ExtractVarName(tokens[1])
+                (tokens[0].find('angle_coeff') == 0) and
+                (token1_is_re or token1_is_wild)):
+                if token1_is_re:
+                    regex_str = typestr[1:-1]
+                    left_paren = text_after = ''
+                    typepattern = re.compile(regex_str)
+                else:
+                    left_paren, typepattern, text_after = ExtractVarName(tokens[1])
                 for antype in angle_types:
                     if MatchesPattern(antype, typepattern):
                         #assert(left_paren == '')
@@ -182,11 +200,14 @@ def main():
                         sys.stdout.write('@'.join(tokens) + '\n')
 
             elif ((len(tokens) >= 2) and
-                  (tokens[0].find('dihedral_coeff') == 0) and
-                  #does this token contain '*' or '?'
-                  HasWildcard(tokens[1]) and
-                  (tokens[1][0:1] != '{')):  #(don't pattern match * in {})
-                left_paren, typepattern, text_after = ExtractVarName(tokens[1])
+                (tokens[0].find('dihedral_coeff') == 0) and
+                (token1_is_re or token1_is_wild)):
+                if token1_is_re:
+                    regex_str = typestr[1:-1]
+                    left_paren = text_after = ''
+                    typepattern = re.compile(regex_str)
+                else:
+                    left_paren, typepattern, text_after = ExtractVarName(tokens[1])
                 for dtype in dihedral_types:
                     if MatchesPattern(dtype, typepattern):
                         #assert(left_paren == '')
@@ -194,11 +215,14 @@ def main():
                         sys.stdout.write('@'.join(tokens) + '\n')
 
             elif ((len(tokens) >= 2) and
-                  (tokens[0].find('improper_coeff') == 0) and
-                  #does this token contain '*' or '?'
-                  HasWildcard(tokens[1]) and
-                  (tokens[1][0:1] != '{')):  #(don't pattern match * in {})
-                left_paren, typepattern, text_after = ExtractVarName(tokens[1])
+                (tokens[0].find('improper_coeff') == 0) and
+                (token1_is_re or token1_is_wild)):
+                if token1_is_re:
+                    regex_str = typestr[1:-1]
+                    left_paren = text_after = ''
+                    typepattern = re.compile(regex_str)
+                else:
+                    left_paren, typepattern, text_after = ExtractVarName(tokens[1])
                 for itype in improper_types:
                     if MatchesPattern(itype, typepattern):
                         #assert(left_paren == '')
@@ -258,16 +282,44 @@ def main():
                     text_after2 = ''
                     typepattern2 = tokens[2]
                     del tokens[3]
-                if (HasWildcard(tokens[1]) and
-                    (tokens[1][0:1] != '{')):  #(don't pattern match * in {})
+
+                ################
+                # If surrounded by '/' characters, the token is meant to be
+                # interpreted as a regular expression.
+                token1_is_re = ((len(tokens[1]) >= 2) and
+                                (tokens[1][0] == '/') and
+                                (tokens[1][-1] == '/'))
+                token2_is_re = ((len(tokens[2]) >= 2) and
+                                (tokens[2][0] == '/') and
+                                (tokens[2][-1] == '/'))
+                # If the token contains wildcard characters, interpret
+                # it as a wildcard (ie. glob) expression.
+                token1_is_wild = (HasWildcard(tokens[1])   #contain '*' or '?'
+                                  and (tokens[1][0] != '{'))  #ignore * in {}
+                token2_is_wild = (HasWildcard(tokens[2])   #contain '*' or '?'
+                                  and (tokens[2][0] != '{'))  #ignore * in {}
+                ################
+
+
+                if token1_is_re:
+                    atom_types1 = atom_types
+                    regex_str = tokens[1][1:-1]
+                    typepattern1 = re.compile(regex_str)
+                elif token1_is_wild:
                     atom_types1 = atom_types
                 else:
                     atom_types1 = set([typepattern1])
-                if (HasWildcard(tokens[2]) and
-                    (tokens[2][0:1] != '{')):  #(don't pattern match * in {})
+
+                if token2_is_re:
+                    atom_types2 = atom_types
+                    regex_str = tokens[2][1:-1]
+                    typepattern2 = re.compile(regex_str)
+                elif token2_is_wild:
                     atom_types2 = atom_types
                 else:
                     atom_types2 = set([typepattern2])
+
+
                 for atype1 in atom_types1:
                     #sys.stderr.write('atype1 = \"'+str(atype1)+'\"\n')
                     if MatchesPattern(atype1, typepattern1):