From 7db0cde3cf2889c3541b5451e85ba7cbade5ca00 Mon Sep 17 00:00:00 2001 From: Andrew Jewett Date: Mon, 25 Jan 2021 11:11:18 -0800 Subject: [PATCH] added "extract_line_separation.sh" (not documented yet) and updated the DNA example --- dlpdb/scripts/extract_line_separation.sh | 22 ++++++++++++ .../calc_distances_angles.sh | 36 +++++++++++++++++++ setup.py | 1 + 3 files changed, 59 insertions(+) create mode 100755 dlpdb/scripts/extract_line_separation.sh diff --git a/dlpdb/scripts/extract_line_separation.sh b/dlpdb/scripts/extract_line_separation.sh new file mode 100755 index 0000000..495711b --- /dev/null +++ b/dlpdb/scripts/extract_line_separation.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +BRANCH_OF_LOG="" +if [ "$#" -eq 2 ]; then + BRANCH_OF_LOG="$1" + shift 1 +fi +ATOM_SELECTION="$@" + +if [ -z "${EXTRACTCOORDS}" ]; then + EXTRACTCOORDS="pdb2coords.py -blank" +fi + +while read pdb_file_name; do + echo "${0##*/} processing $pdb_file_name" >&2 + print_coords_command="${EXTRACTCOORDS} ${ATOM_SELECTION} < $pdb_file_name" + #eval "$print_coords_command" | coords2dihedrals.py $BRANCH_OF_LOG + eval "$print_coords_command" | coords2projected_dihedrals.py $BRANCH_OF_LOG | awk '{print $3}' | tr "\n" " " + # You can pipe the results to sed -e 's/\s\+/\n/g' to put on separate lines + echo "" +done + diff --git a/examples/dna_example/statistics_keeping_every_3rd_base_pair/calc_distances_angles.sh b/examples/dna_example/statistics_keeping_every_3rd_base_pair/calc_distances_angles.sh index 6994c40..e10ff67 100755 --- a/examples/dna_example/statistics_keeping_every_3rd_base_pair/calc_distances_angles.sh +++ b/examples/dna_example/statistics_keeping_every_3rd_base_pair/calc_distances_angles.sh @@ -120,6 +120,19 @@ ls -f1 *_12.pdb | extract_dihedrals.sh 180 "\"[0::2]\" \" C3'\" i-6 \" C3'\" i-5 ls -f1 *_12.pdb | extract_dihedrals.sh 180 "\"[0::2]\" \" C3'\" i+6 \" C3'\" i+7 \" C3'\" i+1 \" C3'\"" >> dihedrals_zigzag_torsion_C3p-C3p-C3p-C3p_raw.dat +# Optional new addition (2021-1-24): "Projected Dihedrals" +# These are the torsion angles between successive bases. +# The script that calculates these angles also reports +# the distance between successive bases. +# Together this information can be used to estimate +# the average helical twist per monomer, as well as +# the average distance per monomer. +# (Here 1 "monomer" equals 3 base pairs.) + +ls -f1 *_12.pdb | extract_projected_dihedrals.sh 180 "\"[0::2]\" \" C3'\" i+1 \" C3'\" i+7 \" C3'\" i+6 \" C3'\"" > projected_dihedrals_C3p-C3p-C3p-C3p_raw.dat +ls -f1 *_12.pdb | extract_line_separation.sh 180 "\"[0::2]\" \" C3'\" i+1 \" C3'\" i+7 \" C3'\" i+6 \" C3'\"" > line_separation_C3p-C3p-C3p-C3p_raw.dat + + # Now put all the numbers on separate lines, and throw away # impossible values (large or negative distances or angles) awk '{for (i=1;i<=NF;i++){if ($i>=0.0) print $i}}' < distances_basepairs_C3p-C3p_raw.dat > distances_basepairs_C3p-C3p.dat @@ -140,6 +153,10 @@ awk '{for (i=1;i<=NF;i++){if ($i>=-360) print $i}}' < dihedrals_zigzag_minorgroo awk '{for (i=1;i<=NF;i++){if ($i>=-360) print $i}}' < dihedrals_zigzag_torsion_C3p-C3p-C3p-C3p_raw.dat > dihedrals_zigzag_torsion_C3p-C3p-C3p-C3p.dat +awk '{for (i=1;i<=NF;i++){if ($i>=-360) print $i}}' < projected_dihedrals_C3p-C3p-C3p-C3p_raw.dat > projected_dihedrals_C3p-C3p-C3p-C3p.dat +awk '{for (i=1;i<=NF;i++){if ($i>=0.0) print $i}}' < line_separation_C3p-C3p-C3p-C3p_raw.dat > line_separation_C3p-C3p-C3p-C3p.dat + + # Some nucleotides at the very start and the end of a helix are # arranged differently than the nucleotides in the middle, # therefore, perhaps we should discard them. @@ -188,6 +205,10 @@ awk '{for (i=1;i<=NF;i++){if ($i >=-360) printf "%g ",$i} printf "\n"}' < dihedr +awk '{for (i=1;i<=NF;i++){if ($i >=-360) printf "%g ",$i} printf "\n"}' < projected_dihedrals_C3p-C3p-C3p-C3p_raw.dat | truncate_tokens.py 1 1 | awk '{ for (i=1;i<=NF;i++){print $i}}' > projected_dihedrals_C3p-C3p-C3p-C3p_trunc1-1.dat +awk '{for (i=1;i<=NF;i++){if ($i >=0.0) printf "%g ",$i} printf "\n"}' < line_separation_C3p-C3p-C3p-C3p_raw.dat | truncate_tokens.py 1 1 | awk '{ for (i=1;i<=NF;i++){print $i}}' > line_separation_C3p-C3p-C3p-C3p_trunc1-1.dat + + # --------------------------------------------------------------- # -- Finally, calculate their averages and standard-deviations -- @@ -292,3 +313,18 @@ awk '{for(i=1;i<=NF;++i){if ($i<=0.0){sum+=$i;sumsq+=$i*$i;n++}}} END{print sum/ < dihedrals_zigzag_torsion_C3p-C3p-C3p-C3p_trunc1-1.dat \ > dihedrals_zigzag_torsion_C3p-C3p-C3p-C3p_trunc1-1_ave_dev_n.dat + + +awk '{for(i=1;i<=NF;++i){if ($i>=0.0){sum+=$i;sumsq+=$i*$i;n++}}} END{print sum/n " " sqrt((sumsq/n - (sum/n)*(sum/n))*(n/(n-1))) " " n}' \ + < projected_dihedrals_C3p-C3p-C3p-C3p.dat \ + > projected_dihedrals_C3p-C3p-C3p-C3p_ave_dev_n.dat +awk '{for(i=1;i<=NF;++i){if ($i>=0.0){sum+=$i;sumsq+=$i*$i;n++}}} END{print sum/n " " sqrt((sumsq/n - (sum/n)*(sum/n))*(n/(n-1))) " " n}' \ + < projected_dihedrals_C3p-C3p-C3p-C3p_trunc1-1.dat \ + > projected_dihedrals_C3p-C3p-C3p-C3p_trunc1-1_ave_dev_n.dat + +awk '{for(i=1;i<=NF;++i){if ($i>=0.0){sum+=$i;sumsq+=$i*$i;n++}}} END{print sum/n " " sqrt((sumsq/n - (sum/n)*(sum/n))*(n/(n-1))) " " n}' \ + < line_separation_C3p-C3p-C3p-C3p.dat \ + > line_separation_C3p-C3p-C3p-C3p_ave_dev_n.dat +awk '{for(i=1;i<=NF;++i){if ($i>=0.0){sum+=$i;sumsq+=$i*$i;n++}}} END{print sum/n " " sqrt((sumsq/n - (sum/n)*(sum/n))*(n/(n-1))) " " n}' \ + < line_separation_C3p-C3p-C3p-C3p_trunc1-1.dat \ + > line_separation_C3p-C3p-C3p-C3p_trunc1-1_ave_dev_n.dat diff --git a/setup.py b/setup.py index 6e5362e..9e4d30d 100644 --- a/setup.py +++ b/setup.py @@ -34,6 +34,7 @@ scripts=['dlpdb/scripts/extract_angles.sh', 'dlpdb/scripts/extract_dihedrals.sh', 'dlpdb/scripts/extract_projected_dihedrals.sh', + 'dlpdb/scripts/extract_line_separation.sh', 'dlpdb/scripts/extract_distances.sh', 'dlpdb/scripts/extract_helix_angles.sh', 'dlpdb/scripts/extract_helix_dihedrals.sh',