From ddc59c29f12b28ed0c92eb9f7a88aac011ec0b4b Mon Sep 17 00:00:00 2001
From: Lorenzo Rovigatti <lorenzo.rovigatti@gmail.com>
Date: Mon, 9 Sep 2024 11:21:28 +0200
Subject: [PATCH 1/4] Add some documentation as a follow up to #122

---
 docs/source/install.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/source/install.md b/docs/source/install.md
index c5afb3e4..6c253077 100644
--- a/docs/source/install.md
+++ b/docs/source/install.md
@@ -39,7 +39,7 @@ cd build        # enter the build folder (see above)
 make -j4        # compile the updated source
 ```
 
-If you also want to update `oxpy` don't forget to run `make install` after the compilation.
+If you also want to update `oxpy` and `OAT` don't forget to run `make install` after the compilation.
 
 ### CMake options
 
@@ -104,6 +104,7 @@ cmake -DPython=1 -DPYTHON_EXECUTABLE=$HOME/miniconda3/bin/python -DPYTHON_INCLUD
 ## Known issues
 
 * An `illegal instruction` is sometimes issued when the code is compiled on a CPU architecture and run on another, or when specific combinations of CPU architecture and compiler are used. Invoke CMake with `-DNATIVE_COMPILATION=Off` and re-compile the code to fix the issue.
+* When compiling oxDNA with Python support on Microsoft's WSL, if the local repository is downloaded in Windows (*i.e.*, outside WSL), tests and analysis scripts may fail (see [this issue](https://github.com/lorenzo-rovigatti/oxDNA/issues/122#issue-2499923060)). To avoid these problems, clone the repository directly within the WSL environment.
 * A list of other known issues can be browsed online [here](https://github.com/lorenzo-rovigatti/oxDNA/issues).
 
 

From 51def3761097a399174cc487e361071d82a10c46 Mon Sep 17 00:00:00 2001
From: Lorenzo Rovigatti <lorenzo.rovigatti@gmail.com>
Date: Wed, 11 Sep 2024 14:52:51 +0200
Subject: [PATCH 2/4] Add more documentation (and debug output) related to
 `cells_auto_optimisation` (see #127)

---
 docs/source/performance.md | 30 ++++++++++++++++++++++++++++++
 src/CUDA/CUDAUtils.h       |  2 ++
 2 files changed, 32 insertions(+)

diff --git a/docs/source/performance.md b/docs/source/performance.md
index 70128ee9..f996d964 100644
--- a/docs/source/performance.md
+++ b/docs/source/performance.md
@@ -45,6 +45,36 @@ When running CUDA-powered simulations, the box size has a non-trivial effect on
 
 Since there is no dynamic memory on GPUs, in order to avoid crashing simulations oxDNA sets the size of the cells used to build neighbouring lists so that their memory footprint is not too high. If you want to optimise performance is sometimes worth to set `cells_auto_optimisation = false` so that oxDNA uses the smallest possible cells (at the cost of memory consumption). If the resulting memory footprint can be handled by your GPU you'll probably see some (possibly large) performance gains.
 
+There are some heuristics that attempt to limit the memory consumption of CUDA simulations. First of all, the given combination of parameters is used to evaluate the minimum size of the cells required to build neighbouring lists, $r_m$. In turn, $r_m$ is used to compute the number of cells along each coordinate $i$ (where $i = x, y, z$) as
+
+$$
+N_i = \max(\lfloor L_i / r_m \rfloor, 3),
+$$
+
+where $L_i$ is the length of the box edge along the $i$-th direction. This value of $N_i$ is the number of cells used for the simulation if `cells_auto_optimisation` is set to `false`. However, if it set to `true`, which is the default, then the code checks whether $N_i > \lceil f L_i \rceil$, and if it is then sets
+
+$$
+N_i = f L_i,
+$$
+
+where
+
+$$
+f = \left( \frac{2 N}{L_x L_y L_z} \right)^{1/3}.
+$$
+
+The maximum number of particles that are in each given cell, $M$, is another important parameter that can be, to some extent, tuned to avoid crashes. It is defined at the beginning of the simulation, and also each time the total number of cells changes while the simulation is running, as
+
+$$
+M = f_\rho M_\text{max},
+$$
+
+where $f_\rho$ is a factor that can be set with the `max_density_multiplier` option and defaults to 3, while $M_\text{max}$ is the number of particles found in the cell containing the largest amount of particles in the current configuration.
+
+:::{note}
+On newer versions of oxDNA (> 3.6.1), setting `debug = true` will report in the log file (or on screen if `log_file` is not set) the amount of memory that is requested by each allocation on the GPU.
+:::
+
 ## Monte Carlo
 
 When running Monte Carlo simulations the efficiency of the sampling depends on specific moves employed during the simulation. For regular Monte Carlo and VMMC simulations, the most important options are `delta_translation` and `delta_rotation`, which set the maximum displacement for translations and rotations. Optimal values depend very much on the system at hand, so it is hard to provide some guidelines, although often values around `0.1` given decent performance. Sometimes it may be worth to set [`adjust_moves = true` (together with `equilibration_steps > 0`)](input.md#monte-carlo-options) to let the code look for optimal values.
diff --git a/src/CUDA/CUDAUtils.h b/src/CUDA/CUDAUtils.h
index 9b9ae2fc..f677a827 100644
--- a/src/CUDA/CUDAUtils.h
+++ b/src/CUDA/CUDAUtils.h
@@ -89,6 +89,8 @@ class GpuUtils {
 
 template<typename T>
 cudaError_t GpuUtils::LR_cudaMalloc(T **devPtr, size_t size) {
+	OX_LOG(Logger::LOG_DEBUG, "Allocating %lld bytes (%.2lf MB) on the GPU", size, size / 1000000.0);
+
 	GpuUtils::_allocated_dev_mem += size;
 	return cudaMalloc((void **) devPtr, size);
 }

From f42d61dee96205c888a240e9a510f1e5a40e6a6f Mon Sep 17 00:00:00 2001
From: Lorenzo Rovigatti <lorenzo.rovigatti@gmail.com>
Date: Tue, 17 Sep 2024 14:03:15 +0200
Subject: [PATCH 3/4] Make it possible to print the kinetic and total energies
 with any precision

---
 src/Observables/KineticEnergy.cpp | 2 +-
 src/Observables/TotalEnergy.cpp   | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Observables/KineticEnergy.cpp b/src/Observables/KineticEnergy.cpp
index 876d9083..8fd7a9c7 100644
--- a/src/Observables/KineticEnergy.cpp
+++ b/src/Observables/KineticEnergy.cpp
@@ -52,5 +52,5 @@ number KineticEnergy::get_kinetic_energy() {
 std::string KineticEnergy::get_output_string(llint curr_step) {
 	number K = get_kinetic_energy();
 
-	return Utils::sformat("% 10.6lf", K);
+	return Utils::sformat(_number_formatter, K);
 }
diff --git a/src/Observables/TotalEnergy.cpp b/src/Observables/TotalEnergy.cpp
index b3d4e6ee..df6fc8e2 100644
--- a/src/Observables/TotalEnergy.cpp
+++ b/src/Observables/TotalEnergy.cpp
@@ -33,7 +33,8 @@ std::string TotalEnergy::get_output_string(llint curr_step) {
 	number U = get_U(curr_step);
 	number K = get_K(curr_step);
 
-	return Utils::sformat("% 10.6lf % 10.6lf % 10.6lf", U, K, U + K);
+	std::string format = Utils::sformat("%s %s %s", _number_formatter.c_str(), _number_formatter.c_str(), _number_formatter.c_str());
+	return Utils::sformat(format, U, K, U + K);
 }
 
 number TotalEnergy::get_U(llint curr_step) {

From 306a276c1831927749cabe5964f86e860286d209 Mon Sep 17 00:00:00 2001
From: Eryk Ratajczyk <77619269+eryykr@users.noreply.github.com>
Date: Sat, 21 Sep 2024 06:58:47 +0100
Subject: [PATCH 4/4] Make the NA interaction support custom bases (fix #129)

---
 src/Interactions/DRHInteraction.cpp | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/Interactions/DRHInteraction.cpp b/src/Interactions/DRHInteraction.cpp
index 55567f18..71e2ad9a 100644
--- a/src/Interactions/DRHInteraction.cpp
+++ b/src/Interactions/DRHInteraction.cpp
@@ -979,7 +979,16 @@ void DRHInteraction::allocate_particles(std::vector<BaseParticle*> &particles) {
 				}
 			}
 
-			int N_in_strand = sequence.size();
+			//Needed here to read custom bases
+			std::vector<int> btypes;
+			try {
+				btypes = Utils::btypes_from_sequence(sequence);
+			}
+			catch(oxDNAException &e) {
+				throw oxDNAException("topology file, strand %d (line %d): %s", ns, ns + 1, e.what());
+			}
+
+			int N_in_strand = btypes.size();
 				for(int i = 0; i < N_in_strand; i++, current_idx++) {
 					if(current_idx == parser.N()) {
 						throw oxDNAException("Too many particles found in the topology file (should be %d), aborting", parser.N());