From ea5a721aa3f38305bcc0b236ddedf5704f7d0b24 Mon Sep 17 00:00:00 2001
From: mgiulini <mrcgiulini@gmail.com>
Date: Thu, 29 Feb 2024 10:55:53 +0100
Subject: [PATCH 1/3] updated haddock3 tutorials

---
 .../index.md                                  | 10 +---
 .../HADDOCK3-antibody-antigen/index.md        | 60 +++++++------------
 2 files changed, 22 insertions(+), 48 deletions(-)
diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen-bioexcel2023/index.md b/education/HADDOCK3/HADDOCK3-antibody-antigen-bioexcel2023/index.md
index 0561eea8..5d92bfbc 100644
--- a/education/HADDOCK3/HADDOCK3-antibody-antigen-bioexcel2023/index.md
+++ b/education/HADDOCK3/HADDOCK3-antibody-antigen-bioexcel2023/index.md
@@ -857,10 +857,6 @@ ncores=50
 # Self contained rundir (to avoid problems with long filename paths)
 self_contained = true
 
-# Post-processing to generate statistics and plots
-postprocess = true
-clean = true
-
 molecules =  [
     "pdbs/4G6K_clean.pdb",
     "pdbs/4I1B_clean.pdb"
@@ -1717,7 +1713,7 @@ All three antibody structures used in input give good to reasonable results. The
 
 <hr>
 <hr>
-## BONUS 3: Ensemble-docking using a combination of exprimental and AI-predicted antibody structures
+## BONUS 3: Ensemble-docking using a combination of experimental and AI-predicted antibody structures
 
 
 Instead of running haddock3 using a specific input structure of the antibody we can also use an ensemble of all available models.
@@ -1748,10 +1744,6 @@ ncores=50
 # Self contained rundir (to avoid problems with long filename paths)
 self_contained = true
 
-# Post-processing to generate statistics and plots
-postprocess = true
-clean = true
-
 molecules =  [
     "pdbs/4G6K-ensemble.pdb",
     "pdbs/4I1B_clean.pdb"
diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/index.md b/education/HADDOCK3/HADDOCK3-antibody-antigen/index.md
index 07677fc6..30ae94aa 100644
--- a/education/HADDOCK3/HADDOCK3-antibody-antigen/index.md
+++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/index.md
@@ -261,10 +261,13 @@ all categories and modules. Below is a summary of the available modules:
 * **Analysis modules**
     * `caprieval`: *Calculates CAPRI metrics (i-RMSD, l-RMSD, Fnat, DockQ) with respect to the top scoring model or reference structure if provided.*
     * `clustfcc`: *Clusters models based on the fraction of common contacts (FCC)*
-    * `clustrmsd`: *Clusters models based on pairwise RMSD matrix calculated with the `rmsdmatrix` module.*
     * `rmsdmatrix`: *Calculates the pairwise RMSD matrix between all the models generated in the previous step.*
+    * `ilrmsdmatrix`: *Calculates the pairwise interface-ligand RMSD matrix between all the models generated in the previous step.*
+    * `clustrmsd`: *Clusters models based on pairwise RMSD matrix calculated with the `rmsdmatrix`/`ilrmsdmatrix` module.*
     * `seletop`: *Selects the top N models from the previous step.*
     * `seletopclusts`: *Selects top N clusters from the previous step.*
+    * `contactmap`: *Generates a contact map for the models generated in the previous step.*
+    * `alascan`: *Performs an alanine scanning on the models generated in the previous step.*
 
 The HADDOCK3 workflows are defined in simple configuration text files, similar to the TOML format but with extra features.
 Contrarily to HADDOCK2.X which follows a rigid (yet highly parameterisable)
@@ -629,11 +632,10 @@ binding site in the final models, while the residues defined as `active` (typica
 site residues) will. When using the HADDOCK server, `passive` residues will be automatically defined. Here since we are
 using a local version, we need to define those manually.
 
-This can easily be done using a script from our [haddock-tools][haddock-tools] repository, which is also provided for convenience
-in the `scripts` directly of the archive you downloaded for this tutorial:
+This can easily be done using a command line interface taken from our [haddock-tools][haddock-tools] repository:
 
 <a class="prompt prompt-cmd">
-python ./scripts/passive_from_active.py 4I1B_clean.pdb  72,73,74,75,81,83,84,89,90,92,94,96,97,98,115,116,117
+haddock3-restraints passive_from_active 4I1B_clean.pdb  72,73,74,75,81,83,84,89,90,92,94,96,97,98,115,116,117
 </a>
 
 The NMR-identified residues and their surface neighbors generated with the above command can be used to define ambiguous interactions restraints, either using the NMR identified residues as active in HADDOCK, or combining those with the surface neighbors and use this combination as passive only.
@@ -661,7 +663,7 @@ For this you can either make use of our online [GenTBL][gentbl] web service, ent
 list of active and passive residues for each molecule, and saving the resulting
 restraint list to a text file, or use the relevant `haddock-tools` script.
 
-To use our `haddock-tools` `active-passive-to-ambig.py` script you need to
+To use our `haddock3-restraints` `active_passive_to_ambig` script you need to
 create for each molecule a file containing two lines:
 
 * The first line corresponds to the list of active residues (numbers separated by spaces)
@@ -683,7 +685,7 @@ Using those two files, we can generate the CNS-formatted AIR restraint files
 with the following command:
 
 <a class="prompt prompt-cmd">
-python ./scripts/active-passive-to-ambig.py ./restraints/antibody-paratope.act-pass ./restraints/antigen-surface.pass > ambig-paratope-surface.tbl
+haddock3-restraints active_passive_to_ambig ./restraints/antibody-paratope.act-pass ./restraints/antigen-surface.pass > ambig-paratope-surface.tbl
 </a>
 
 This generates a file called `ambig-paratope-surface.tbl` that contains the AIR
@@ -697,12 +699,11 @@ distance combinations between an active residue and all the active+passive on
 the other molecule: SUM[1/r^6]^(-1/6).
 
 If you modify manually this file, it is possible to quickly check if the format is valid.
-To do so, you can find in our [haddock-tools][haddock-tools] repository a folder named
-`haddock_tbl_validation` that contains a script called `validate_tbl.py` (also provided here in the `scripts` directory).
+To do so, you can use the `haddock3-restraints validate_tbl` utility.
 To use it, type:
 
 <a class="prompt prompt-cmd">
-python ./scripts/validate_tbl.py \-\-silent ambig-paratope-surface.tbl
+haddock3-restraints validate_tbl \-\-silent ambig-paratope-surface.tbl
 </a>
 
 No output means that your TBL file is valid.
@@ -716,7 +717,7 @@ In this scenario the NMR epitope combined with the surface neighbors are used as
 The creation of the AIR tbl file for scenario 2a is similar to scenario 1, but instead using the `antigen-NMR-epitope.pass` file for the antigen:
 
 <a class="prompt prompt-cmd">
-./scripts/active-passive-to-ambig.py ./restraints/antibody-paratope.act-pass ./restraints/antigen-NMR-epitope.pass > ambig-paratope-NMR-epitope-pass.tbl
+haddock3-restraints active_passive_to_ambig ./restraints/antibody-paratope.act-pass ./restraints/antigen-NMR-epitope.pass > ambig-paratope-NMR-epitope-pass.tbl
 </a>
 
 <hr>
@@ -728,7 +729,7 @@ In this scenario the NMR epitope is defined as active (meaning ambiguous distanc
 The creation of the AIR tbl file for scenario 2b is similar to scenario 1, but instead using the `antigen-NMR-epitope.act-pass` file for the antigen:
 
 <a class="prompt prompt-cmd">
-./scripts/active-passive-to-ambig.py ./restraints/antibody-paratope.act-pass ./restraints/antigen-NMR-epitope.act-pass > ambig-paratope-NMR-epitope.tbl
+haddock3-restraints active_passive_to_ambig ./restraints/antibody-paratope.act-pass ./restraints/antigen-NMR-epitope.act-pass > ambig-paratope-NMR-epitope.tbl
 </a>
 
 <hr>
@@ -737,11 +738,10 @@ The creation of the AIR tbl file for scenario 2b is similar to scenario 1, but i
 
 As an antibody consists of two separate chains, it is important to define a few distance restraints
 to keep them together during the high temperature flexible refinement stage of HADDOCK. This can easily be
-done using a script from [haddock-tools][haddock-tools] repository, which is also provided for convenience
-in the `scripts` directly of the archive you downloaded for this tutorial.
+done using another `haddock3-restraints` subcommand.
 
 <a class="prompt prompt-cmd">
-python ./scripts/restrain_bodies.py  4G6K_clean.pdb >antibody-unambig.tbl
+haddock3-restraints restrain_bodies 4G6K_clean.pdb > antibody-unambig.tbl
 </a>
 
 The result file contains two CA-CA distance restraints with the exact distance measured between the picked CA atoms:
@@ -969,12 +969,6 @@ ncores = 96
 # Self contained rundir (to avoid problems with long filename paths)
 self_contained = true
 
-# Post-processing to generate statistics and plots
-postprocess = true
-
-# Cleaning to compress files and save space
-clean = true
-
 # molecules to be docked
 molecules =  [
     "pdbs/4G6K_clean.pdb",
@@ -997,7 +991,7 @@ randremoval = false
 sampling = 10000
 
 [clustfcc]
-threshold = 10
+min_population = 10
 
 [seletopclusts]
 ## select all the clusters
@@ -1077,12 +1071,6 @@ ncores = 96
 # Self contained rundir (to avoid problems with long filename paths)
 self_contained = true
 
-# Post-processing to generate statistics and plots
-postprocess = true
-
-# Cleaning to compress files and save space
-clean = true
-
 # molecules to be docked
 molecules =  [
     "pdbs/4G6K_clean.pdb",
@@ -1103,7 +1091,7 @@ unambig_fname = "restraints/antibody-unambig.tbl"
 randremoval = false
 
 [clustfcc]
-threshold = 10
+min_population = 10
 
 [seletopclusts]
 ## select all the clusters
@@ -1177,12 +1165,6 @@ ncores = 96
 # Self contained rundir (to avoid problems with long filename paths)
 self_contained = true
 
-# Post-processing to generate statistics and plots
-postprocess = true
-
-# Cleaning to compress files and save space
-clean = true
-
 # molecules to be docked
 molecules =  [
     "pdbs/4G6K_clean.pdb",
@@ -1201,7 +1183,7 @@ ambig_fname = "restraints/ambig-paratope-NMR-epitope-act.tbl"
 unambig_fname = "restraints/antibody-unambig.tbl"
 
 [clustfcc]
-threshold = 10
+min_population = 10
 
 [seletopclusts]
 ## select all the clusters
@@ -1771,7 +1753,7 @@ _**Note**_ that this kind of analysis only makes sense when we know the referenc
 
 <br>
 
-<a class="prompt prompt-question">Is the best model always rank as first?</a>
+<a class="prompt prompt-question">Is the best model always ranked as first?</a>
 
 <details style="background-color:#DAE4E7">
   <summary style="bold">
@@ -1989,7 +1971,7 @@ _**Note**_ that this kind of analysis only makes sense when we know the referenc
 </details>
 <br>
 
-<a class="prompt prompt-question">Is the best model always rank as first?</a>
+<a class="prompt prompt-question">Is the best model always ranked as first?</a>
 
 <details style="background-color:#DAE4E7">
   <summary style="bold">
@@ -2004,7 +1986,7 @@ _**Note**_ that this kind of analysis only makes sense when we know the referenc
 
 #### Analysis scenario 2a: visualizing the scores and their components
 
-By setting `postprocess=true` in the config files, interactive plots have been automatically generated in the _analysis_ directory of the run.
+Using the default settings (option `postprocess=true`) in the config files, interactive plots have been automatically generated in the _analysis_ directory of the run.
 These are useful to visualise the scores and their components versus ranks and model quality.
 
 <a class="prompt prompt-info">
@@ -2203,7 +2185,7 @@ _**Note**_ that this kind of analysis only makes sense when we know the referenc
 
 <br>
 
-<a class="prompt prompt-question">Is the best model always rank as first?</a>
+<a class="prompt prompt-question">Is the best model always ranked as first?</a>
 
 <details style="background-color:#DAE4E7">
   <summary style="bold">

From f658a6fb49acb43068f53377f2c59727955cc068 Mon Sep 17 00:00:00 2001
From: mgiulini <mrcgiulini@gmail.com>
Date: Wed, 24 Apr 2024 09:54:31 +0200
Subject: [PATCH 2/3] implemented suggestions

---
 .../HADDOCK3/HADDOCK3-antibody-antigen/index.md | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen/index.md b/education/HADDOCK3/HADDOCK3-antibody-antigen/index.md
index 30ae94aa..027878bd 100644
--- a/education/HADDOCK3/HADDOCK3-antibody-antigen/index.md
+++ b/education/HADDOCK3/HADDOCK3-antibody-antigen/index.md
@@ -520,12 +520,10 @@ If you want to generate the same file, first create an empty line and then use t
   awk \'{if (NF==13 && ($7>15 || $9>15)) printf \"\%d \",$3; if (NF==14 && ($8>15 || $10>15)) printf \"\%d \",$4}\' 4I1B_clean.rsa \>\> antigen-surface.pass<br>
 </a>
 
-
-**_Note_**: If the command line version of freesasa is not available, provided the freesasa python libraries have been installed
-(can simply be done with: `pip install freesasa`), the same can be done with the _calc-accessibility.py_ script provided in the `scripts` directory:
+The same can be achieved using the `haddock3-restraints` command line tool:
 
 <a class="prompt prompt-cmd">
-   python ./scripts/calc-accessibility.py \-\-cutoff 0.15 pdbs/4I1B_clean.pdb
+   haddock3-restraints calc_accessibility --cutoff 0.15 pdbs/4I1B_clean.pdb
 </a>
 
 The simple output directly reports the list of residues:
@@ -632,7 +630,7 @@ binding site in the final models, while the residues defined as `active` (typica
 site residues) will. When using the HADDOCK server, `passive` residues will be automatically defined. Here since we are
 using a local version, we need to define those manually.
 
-This can easily be done using a command line interface taken from our [haddock-tools][haddock-tools] repository:
+This can easily be done using the following HADDOCK3 command line interface:
 
 <a class="prompt prompt-cmd">
 haddock3-restraints passive_from_active 4I1B_clean.pdb  72,73,74,75,81,83,84,89,90,92,94,96,97,98,115,116,117
@@ -991,7 +989,8 @@ randremoval = false
 sampling = 10000
 
 [clustfcc]
-min_population = 10
+min_population = 4
+top_models = 10
 
 [seletopclusts]
 ## select all the clusters
@@ -1091,7 +1090,8 @@ unambig_fname = "restraints/antibody-unambig.tbl"
 randremoval = false
 
 [clustfcc]
-min_population = 10
+min_population = 4
+top_models = 10
 
 [seletopclusts]
 ## select all the clusters
@@ -1183,7 +1183,8 @@ ambig_fname = "restraints/ambig-paratope-NMR-epitope-act.tbl"
 unambig_fname = "restraints/antibody-unambig.tbl"
 
 [clustfcc]
-min_population = 10
+min_population = 4
+top_models = 10
 
 [seletopclusts]
 ## select all the clusters

From 3bee078471d8ca8b69960693e30fd5ad90b4e9fa Mon Sep 17 00:00:00 2001
From: mgiulini <mrcgiulini@gmail.com>
Date: Wed, 24 Apr 2024 09:54:43 +0200
Subject: [PATCH 3/3] typo

---
 .../HADDOCK3/HADDOCK3-antibody-antigen-bioexcel2023/index.md    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/education/HADDOCK3/HADDOCK3-antibody-antigen-bioexcel2023/index.md b/education/HADDOCK3/HADDOCK3-antibody-antigen-bioexcel2023/index.md
index 5d92bfbc..f8f363b5 100644
--- a/education/HADDOCK3/HADDOCK3-antibody-antigen-bioexcel2023/index.md
+++ b/education/HADDOCK3/HADDOCK3-antibody-antigen-bioexcel2023/index.md
@@ -612,7 +612,7 @@ The difference between `active` and `passive` residues in HADDOCK is:
 *__passive__* residues should be at the interface, if they are not no energetic penalty is paid.
 
 In general it is better to be too generous rather than too strict in the definition of passive residues. An important aspect is to filter both the active (the residues identified from
-your mapping experiment) and passive residues by their solvent accessibility. This is done automatically when using the `haddock3-restraints passive_from_active` command: residues with less that 15% relative solvent accessibility (same cutoff as the default in the HADDOCK server) are discared.
+your mapping experiment) and passive residues by their solvent accessibility. This is done automatically when using the `haddock3-restraints passive_from_active` command: residues with less that 15% relative solvent accessibility (same cutoff as the default in the HADDOCK server) are discarded.
 This is however not a hard limit and you might consider including even more buried residues if some
 important chemical group seems solvent accessible from a visual inspection.