From eed6a30f3bfe56ab6685e9daed04f0aaae7f451b Mon Sep 17 00:00:00 2001 From: Andrew Jewett Date: Sat, 3 Dec 2022 20:14:48 -0800 Subject: [PATCH] better support for LT files not named "system.lt". The "run.in.EXAMPLE" file is renamed in this case to avoid name collisions. The "cleanup_moltemplate.sh" script is more robust and can also handle files with names that do not begin with "system". --- doc/doc_cleanup_moltemplate.md | 35 ++-- moltemplate/scripts/cleanup_moltemplate.sh | 199 +++++++++++++++++---- moltemplate/scripts/moltemplate.sh | 11 +- setup.py | 4 +- 4 files changed, 198 insertions(+), 51 deletions(-) diff --git a/doc/doc_cleanup_moltemplate.md b/doc/doc_cleanup_moltemplate.md index eb841664..607692bb 100644 --- a/doc/doc_cleanup_moltemplate.md +++ b/doc/doc_cleanup_moltemplate.md @@ -15,14 +15,33 @@ For most files, this process is automatic. However sometimes additional manual editing is necessary. Again, be sure to read the [*Limitations*](#Limitations) below. + ## Usage ``` -cleanup_moltemplate.sh +cleanup_moltemplate.sh [-base BASE_NAME] [-ignore-comments] +``` + +## Typical usage +``` +cleanup_moltemplate.sh # (no arguments) ``` -The program will attempt to infer atom type names from comments that -are added to the "Masses" section of the DATA file (which are usually + +## Optional Arguments + +### -base BASE_NAME +By default, this script assumes that your LAMMPS DATA file is named +"system.data", and your LAMMPS INPUT scripts have names beginning +with "system.in.". +If these files begin with a different string (eg \"BASE_NAME\"), +then you run cleanup_moltemplate.sh with the "-base BASE_NAME" argument +*(Note that the beginning of all of these file names must agree.)* + +### -ignore-comments + +By default, this program will attempt to infer atom type names from comments +that are added to the "Masses" section of the DATA file (which are usually automatically generated by moltemplate.sh by default). This might cause problems if you have placed your own comments in the "Data Masses" section of your LT files because spaces in atom type names @@ -34,17 +53,13 @@ which will replace all of the atom type names with "type1", "type2", ... cleanup_moltemplate.sh -ignore-comments ``` -### Limitations - -This script assumes that your LAMMPS DATA file is named "system.data", and -your LAMMPS INPUT scripts have names beginning with "system.in.". -If this is not the case, you *must* rename them before running this script. +## Limitations -*(If there are multiple input scripts, then the input script which specifies +If there are multiple input scripts, then the input script which specifies the atom_style, pair_style, bond_style, angle_style, dihedral_style, and improper_style (if applicable) should appear first when listed alphabetically. IE. The name of that file must be earlier than the -names of the other files, lexicographically.)* +names of the other files, lexicographically. This program uses "ltemplify.py" to parse these files and discard unused atom types. Consequently is subject to the same limitations that diff --git a/moltemplate/scripts/cleanup_moltemplate.sh b/moltemplate/scripts/cleanup_moltemplate.sh index 512c51e6..675ea883 100755 --- a/moltemplate/scripts/cleanup_moltemplate.sh +++ b/moltemplate/scripts/cleanup_moltemplate.sh @@ -25,40 +25,161 @@ # significantly more memory if n is large. For example, the "oplsaa.lt" file # and "oplsaa.prm" (TINKER-format) file both define almost 1000 atom types.) # -# Usage: Invoke this script with no arguments, from a directory -# containing these files: +# Usage: +# cleanup_moltemplate [-base BASE_FILE_NAME] +# +# Invoke this script from a directory containing these files: # system.data, system.in.init, system.in.settings, system.in.charges -# It will modify these files to remove unnecessary atoms and -# parameters. (If your files have other names, you must rename +# (If your files don't begin with the name "system", you can pass +# the "-base" argument to select files with a different base name.) +# +# Cleanup_moltemplate.sh will modify these files to remove unnecessary +# atoms and parameters. (If your files have other names, you must rename # them to match moltemplate file name conventions.) # # DO NOT USE THIS SCRIPT ON SIMULATIONS CONTAINING MANY-BODY PAIR STYLES, # DREIDING-STYLE HYDROGEN BONDS, OR SIMS NEEDING NON-STANDARD AUXILIARY FILES. # (This script relies on ltemplify.py and inherits its limitations.) -if [ ! -f system.data ] || [ ! -f system.in.init ] || [ ! -f system.in.settings ]; + + +if which python3 > /dev/null; then + PYTHON_COMMAND='python3' +elif which python > /dev/null; then + PYTHON_COMMAND='python' +elif which python2 > /dev/null; then + PYTHON_COMMAND='python2' +fi + +# Determine the directory in which the python scripts are located. +# (such as ltemplify.py). It could either be the directory where the script +# file is located, OR it could be the parent of this directory. +PY_SCR_DIR=`dirname "$0"` +if [ ! -s "${PY_SCR_DIR}/ltemplify.py" ]; then + PY_SCR_DIR="$PY_SCR_DIR/.." +fi + + +BASE_NAME="system" +LTEMPLIFY_ARGS="" +# Store the list ofo arguments in ARGV, and count them in ARGC +ARGC=0 +for A in "$@"; do + A_FIRSTCHAR="$(echo $A| cut -c 1)" + # (Note to self: this next line only works in bash, not classic sh.) + if [ "$A_FIRSTCHAR" = "\$" ]; then + A="\\$A" # put an extra slash in front to prevent expansion later + fi + ARGC=$((ARGC+1)) + eval ARGV${ARGC}=\"$A\" +done + + +i=0 +while [ "$i" -lt "$ARGC" ]; do + i=$((i+1)) + eval A=\${ARGV${i}} + + if [ "$A" = "-base" ]; then + # Change the atom ordering rules in a 2-body bonded interaction: + if [ "$i" -eq "$ARGC" ]; then + echo "ERROR cleanup_moltemplate.sh: base file name expected following -base argument" >&2 + exit 7 + fi + i=$((i+1)) + eval A=\${ARGV${i}} + FILE_NAME=$A + # strip off the ".data" suffix (if present) + BASE_NAME=`basename "$FILE_NAME" ".data"` + # strip off the ".lmpdat" suffix (if present) + BASE_NAME=`basename "$BASE_NAME" ".lmpdat"` + + #else: If the arguments are not understood in this script, then + # pass them on to "ltemplify.py" + else + A_FIRSTCHAR="$(echo $A| cut -c 1)" + + if [ "$A_FIRSTCHAR" = "\$" ]; then + A="\\$A" # put an extra slash in front to prevent expansion later + fi + + if [ -z "$LTEMPLIFY_ARGS" ]; then + LTEMPLIFY_ARGS="$A" + else + LTEMPLIFY_ARGS="${LTEMPLIFY_ARGS} $A" + fi + # Check to see if this string ($A) ends in .lt or .LT + # If so, then set the base name of the output files + # to equal the base name of the .LT file being read. + # (Being careful here. + # Sometimes the last argument is not the .lt or .LT file. + # Sometimes that file appears earlier in the argument list. + # I want to supply a default value.) + # + # Note, in bash you can use: + # if [ "${LAST_ARG/%.lt/}" -neq "$LAST_ARG" ]; then + # OUT_FILE_BASE="${LAST_ARG/%.lt/}" + # But in the original bourn shell (sh), this does not work. + # Instead we use a hack involving basename and dirname: + + if [ "$A_FIRSTCHAR" != "-" ]; then + DN=`dirname "$A"` + if [ "$DN" = "." ]; then + DN="" + else + DN="${DN}/" + fi + + BN=`basename "$A" .lt` + if [ "${DN}${BN}" != "$A" ]; then + OUT_FILE_BASE="$BN" + else + BN=`basename "$A" .LT` + if [ "${DN}${BN}" != "$A" ]; then + OUT_FILE_BASE="$BN" + fi + fi + fi + fi +done + + + +if [ ! -f "${BASE_NAME}.data" ] || [ ! -f "${BASE_NAME}.in.init" ] || [ ! -f "${BASE_NAME}.in.settings" ]; then echo "============================ ERROR ==============================" >&2 echo "The following files must be exist for this script to work" >&2 - echo " system.data, system.in.init, system.in.settings" >&2 + echo " ${BASE_NAME}.data, ${BASE_NAME}.in.init, ${BASE_NAME}.in.settings" >&2 echo "This script assumes that the files you created with moltemplate begin" >&2 - echo "with \"system\". If those files are absent, it means that you did not run" >&2 - echo " moltemplate.sh system.lt [...]" >&2 - echo "This happens if you named your \"system.lt\" file something else." >&2 - echo "To get around this error, change your main .lt file to \"system.lt\"," >&2 - echo "and try running moltemplate.sh again. Alternatively, you can try" >&2 - echo "renaming ALL of the generated files (including the files ending in:" >&2 + echo "with \"${BASE_NAME}\". If those files are absent, it means that you did not run" >&2 + echo " moltemplate.sh ${BASE_NAME}.lt [...]" >&2 + echo "This usually happens if you named your \"${BASE_NAME}.lt\" file to something else." >&2 + echo "" >&2 + if [ "${BASE_NAME}" == "system" ]; then + echo "If these files begin with a different string (eg \"BASE_NAME\"), then try" >&2 + echo "running cleanup_moltemplate.sh with the \"-base BASE_NAME\" argument." >&2 + echo "(Note that the beginning of all of these file names must agree.)" >&2 + else + echo "Try changing your main .lt file to \"${BASE_NAME}.lt\"," >&2 + echo "and try running moltemplate.sh again." >&2 + fi + echo "" >&2 + echo "Alternatively, you can try renaming ALL of the generated files" >&2 + echo "(including the files ending in:" >&2 echo "\".data\", \".in.init\", \".in.settings\", and \".in.charges\" if present)" >&2 echo " to:" >&2 - echo "system.data, system.in.init, system.in.settings (system.in.charges if present)" >&2 + echo "${BASE_NAME}.data, ${BASE_NAME}.in.init, ${BASE_NAME}.in.settings (${BASE_NAME}.in.charges if present)" >&2 echo "================================================================" >&2 exit 1 fi + + PATH_TO_DATA_FILE="." pushd "$PATH_TO_DATA_FILE" +rm -rf new_lt_file_TMP mkdir new_lt_file_TMP cd new_lt_file_TMP @@ -70,9 +191,12 @@ cd new_lt_file_TMP # from comments in the "Masses" section of the DATA file. Such comments # might contain spaces or special characters which we want to avoid.) - ltemplify.py "$@" ../system.in.* ../system.data > system.lt + if ! $PYTHON_COMMAND "${PY_SCR_DIR}/ltemplify.py" $LTEMPLIFY_ARGS "../${BASE_NAME}.in".* "../${BASE_NAME}.data" > "${BASE_NAME}.lt"; then + echo "ERROR: ltemplify.py failed to parse your files." >&2 + exit 1 + fi - # This creates a new .LT file named "system.lt" in the local directory. + # This creates a new .LT file named ${BASE_NAME}.lt in the local directory. # The ltemplify.py script also does not copy the boundary dimensions. # We must do this manually. @@ -80,7 +204,7 @@ cd new_lt_file_TMP # until you have # If you did NOT throw away the "Data Boundary" file usually located in # "moltemplate_files/output_ttree/Data Boundary" - # then you can copy that information from this file into system.lt + # then you can copy that information from this file into ${BASE_NAME}.lt # oops. looks like we don't need this after all @@ -90,43 +214,46 @@ cd new_lt_file_TMP # awk '{a[i++]=$0} END {for (j=i-1; j>=0;) print a[j--] }' #} - echo "" >> system.lt - echo "write_once(\"Data Boundary\") {" >> system.lt + echo "" >> "${BASE_NAME}.lt" + echo "write_once(\"Data Boundary\") {" >> "${BASE_NAME}.lt" # Extract the periodic boundary box dimensions from the # end of the header section of the LAMMPS data file: - extract_lammps_data.py Header < ../system.data | awk '{if (($3=="xlo") && ($4=="xhi")) {xl=$0} if (($3=="ylo") && ($4=="yhi")) {yl=$0} if (($3=="zlo") && ($4=="zhi")) {zl=$0} if (($4=="xy") && ($5=="xz") && ($6=="yz")) {xtr=$0}} END{print xl; print yl; print zl; if (xtr!="") {print xtr}}' >> system.lt - echo "}" >> system.lt - echo "" >> system.lt + $PYTHON_COMMAND "${PY_SCR_DIR}/extract_lammps_data.py" Header < "../${BASE_NAME}.data" | awk '{if (($3=="xlo") && ($4=="xhi")) {xl=$0} if (($3=="ylo") && ($4=="yhi")) {yl=$0} if (($3=="zlo") && ($4=="zhi")) {zl=$0} if (($4=="xy") && ($5=="xz") && ($6=="yz")) {xtr=$0}} END{print xl; print yl; print zl; if (xtr!="") {print xtr}}' >> "${BASE_NAME}.lt" + echo "}" >> "${BASE_NAME}.lt" + echo "" >> "${BASE_NAME}.lt" # Now, run moltemplate on this new .LT file. - # Interpret the system.lt literally. Don't check for duplicates("-overlay...") - moltemplate.sh system.lt \ - -overlay-bonds -overlay-angles \ - -overlay-dihedrals -overlay-impropers - # This will create: "system.data" "system.in.init" "system.in.settings." + # Interpret the "${BASE_NAME}.lt literally. Don't check for duplicates("-overlay...") + if ! "${PY_SCR_DIR}/scripts/moltemplate.sh" "${BASE_NAME}.lt" \ + -overlay-bonds -overlay-angles \ + -overlay-dihedrals -overlay-impropers; then + echo "ERROR: cleanup_moltemplate.sh: unable to convert the simplified LT file to LAMMPS files" >&2 + exit 1 + fi + # This will create: ${BASE_NAME}.data, ${BASE_NAME}.in.init, ${BASE_NAME}.in.settings - # That's it. The new "system.data" and "system.in.settings" files should + # That's it. The new ${BASE_NAME}.data and ${BASE_NAME}.in.settings files should # be ready to run in LAMMPS. # Special case: "set" commands # Typically "set type" or "set atom" commands are used to assign atom charge - # If there is a "system.in.charges" file, then it contains these commands + # If there is a ${BASE_NAME}.in.charges file, then it contains these commands # however the atom type numbers will be wrong, so we must rewrite it. - # Replace it with the corresponding commands from the system.in.settings + # Replace it with the corresponding commands from the ${BASE_NAME}.in.settings # (whose atom type numbers are correct) - if [ -f "../system.in.charges" ]; then + if [ -f "../${BASE_NAME}.in.charges" ]; then awk '{ if ((NF >= 5) && ($1 == "set") && ($4 == "charge")) print $0}' \ - < system.in.settings > system.in.charges - # There is no need to remove these lines from "system.in.settings", + < "${BASE_NAME}.in.settings" > "${BASE_NAME}.in.charges" + # There is no need to remove these lines from ""${BASE_NAME}.in.settings", # (because there's no harm to invoke the "set" command twice) # ...but if you want to do that, try using a command similar to: - #sed '/set type/,+1 d' < system.in.settings > system.in.settings.tmp - #mv -f system.in.settings.tmp system.in.settings + #sed '/set type/,+1 d' < "${BASE_NAME}.in.settings" > "${BASE_NAME}.in.settings.tmp" + #mv -f "${BASE_NAME}.in.settings.tmp" "${BASE_NAME}.in.settings" fi - # Now move the system.data and system.in.* files to their original location: - mv -f system.data system.in.* ../ + # Now move the ${BASE_NAME}.data and ${BASE_NAME}.in.* files to their original location: + mv -f "${BASE_NAME}.data" "${BASE_NAME}.in".* ../ cd ../ # Finally, delete all of the temporary files we generated diff --git a/moltemplate/scripts/moltemplate.sh b/moltemplate/scripts/moltemplate.sh index 1db67349..b7e88c21 100755 --- a/moltemplate/scripts/moltemplate.sh +++ b/moltemplate/scripts/moltemplate.sh @@ -6,8 +6,8 @@ # Copyright (c) 2013 G_PROGRAM_NAME="moltemplate.sh" -G_VERSION="2.20.15" -G_DATE="2022-10-30" +G_VERSION="2.20.16" +G_DATE="2022-12-03" echo "${G_PROGRAM_NAME} v${G_VERSION} ${G_DATE}" >&2 echo "" >&2 @@ -837,7 +837,12 @@ fi -OUT_FILE_EXAMPLE_SCRIPT="run.in.EXAMPLE" +OUT_FILE_EXAMPLE_SCRIPT="run.in.EXAMPLE" # default LAMMPS input script example +if [ "$OUT_FILE_BASE" != "system" ]; then + # For users who choose custom .LT file names, + # the files that moltemplate creates should have custom names also: + OUT_FILE_EXAMPLE_SCRIPT="run.in.EXAMPLE.${OUT_FILE_BASE}" +fi OUT_FILE_INPUT_SCRIPT="${OUT_FILE_BASE}.in" OUT_FILE_INIT="${OUT_FILE_BASE}.in.init" OUT_FILE_SETTINGS="${OUT_FILE_BASE}.in.settings" diff --git a/setup.py b/setup.py index 0275dba9..1d28a487 100644 --- a/setup.py +++ b/setup.py @@ -45,9 +45,9 @@ url='https://github.com/jewettaij/moltemplate', - download_url='https://github.com/jewettaij/moltemplate/archive/v2.20.15.zip', + download_url='https://github.com/jewettaij/moltemplate/archive/v2.20.16.zip', - version='2.20.15', + version='2.20.16', keywords=['simulation', 'LAMMPS', 'molecule editor', 'molecule builder', 'ESPResSo'],